feat: Add MissingFileScanner service with cursor-based batched filesyst…
- "src/services/missing-file-scanner.ts" - "src/__tests__/missing-file-scanner.test.ts" - "src/types/index.ts" - "src/db/schema/content.ts" GSD-Task: S06/T01
This commit is contained in:
parent
c0ac8cadd5
commit
61da729fa4
4 changed files with 380 additions and 1 deletions
207
src/__tests__/missing-file-scanner.test.ts
Normal file
207
src/__tests__/missing-file-scanner.test.ts
Normal file
|
|
@ -0,0 +1,207 @@
|
||||||
|
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||||
|
import { mkdtempSync, rmSync, existsSync } from 'node:fs';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { initDatabaseAsync, closeDatabase } from '../db/index';
|
||||||
|
import { runMigrations } from '../db/migrate';
|
||||||
|
import { contentItems, systemConfig } from '../db/schema/index';
|
||||||
|
import { eq } from 'drizzle-orm';
|
||||||
|
|
||||||
|
// ── Mock fs/promises.access to control which files "exist" ──
|
||||||
|
const existingFiles = new Set<string>();
|
||||||
|
|
||||||
|
vi.mock('node:fs/promises', async (importOriginal) => {
|
||||||
|
const actual = await importOriginal<typeof import('node:fs/promises')>();
|
||||||
|
return {
|
||||||
|
...actual,
|
||||||
|
access: vi.fn(async (filePath: string) => {
|
||||||
|
if (!existingFiles.has(filePath)) {
|
||||||
|
const err = new Error(`ENOENT: no such file or directory, access '${filePath}'`) as NodeJS.ErrnoException;
|
||||||
|
err.code = 'ENOENT';
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
import { MissingFileScanner } from '../services/missing-file-scanner';
|
||||||
|
|
||||||
|
// ── Test Helpers ──
|
||||||
|
|
||||||
|
let tmpDir: string;
|
||||||
|
let db: Awaited<ReturnType<typeof initDatabaseAsync>>;
|
||||||
|
|
||||||
|
async function setupDb() {
|
||||||
|
tmpDir = mkdtempSync(join(tmpdir(), 'tubearr-missing-scan-'));
|
||||||
|
const dbPath = join(tmpDir, 'test.db');
|
||||||
|
db = await initDatabaseAsync(dbPath);
|
||||||
|
await runMigrations(dbPath);
|
||||||
|
return db;
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanup() {
|
||||||
|
closeDatabase();
|
||||||
|
existingFiles.clear();
|
||||||
|
try {
|
||||||
|
if (tmpDir && existsSync(tmpDir)) {
|
||||||
|
rmSync(tmpDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// best-effort
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Insert a content item with the given status and filePath. */
|
||||||
|
async function insertContent(
|
||||||
|
overrides: { status?: string; filePath?: string | null; title?: string } = {}
|
||||||
|
) {
|
||||||
|
const result = await db
|
||||||
|
.insert(contentItems)
|
||||||
|
.values({
|
||||||
|
title: overrides.title ?? 'Test Video',
|
||||||
|
platformContentId: `plat-${Date.now()}-${Math.random()}`,
|
||||||
|
url: 'https://example.com/video',
|
||||||
|
contentType: 'video',
|
||||||
|
status: overrides.status ?? 'downloaded',
|
||||||
|
filePath: 'filePath' in overrides ? overrides.filePath : '/media/test-video.mp4',
|
||||||
|
})
|
||||||
|
.returning();
|
||||||
|
return result[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Tests ──
|
||||||
|
|
||||||
|
describe('MissingFileScanner', () => {
|
||||||
|
beforeEach(async () => {
|
||||||
|
await setupDb();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(cleanup);
|
||||||
|
|
||||||
|
it('returns zero counts when no downloaded items exist', async () => {
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
const result = await scanner.scanAll();
|
||||||
|
|
||||||
|
expect(result.checked).toBe(0);
|
||||||
|
expect(result.missing).toBe(0);
|
||||||
|
expect(result.duration).toBeGreaterThanOrEqual(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not flag items whose files exist on disk', async () => {
|
||||||
|
const item = await insertContent({ filePath: '/media/exists.mp4' });
|
||||||
|
existingFiles.add('/media/exists.mp4');
|
||||||
|
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
const result = await scanner.scanAll();
|
||||||
|
|
||||||
|
expect(result.checked).toBe(1);
|
||||||
|
expect(result.missing).toBe(0);
|
||||||
|
|
||||||
|
// Status should remain 'downloaded'
|
||||||
|
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
|
||||||
|
expect(rows[0].status).toBe('downloaded');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('marks items as missing when file does not exist', async () => {
|
||||||
|
const item = await insertContent({ filePath: '/media/gone.mp4' });
|
||||||
|
// Don't add to existingFiles — file is "missing"
|
||||||
|
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
const result = await scanner.scanAll();
|
||||||
|
|
||||||
|
expect(result.checked).toBe(1);
|
||||||
|
expect(result.missing).toBe(1);
|
||||||
|
|
||||||
|
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
|
||||||
|
expect(rows[0].status).toBe('missing');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skips items with non-downloaded status', async () => {
|
||||||
|
await insertContent({ status: 'monitored', filePath: '/media/monitored.mp4' });
|
||||||
|
await insertContent({ status: 'queued', filePath: '/media/queued.mp4' });
|
||||||
|
await insertContent({ status: 'failed', filePath: '/media/failed.mp4' });
|
||||||
|
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
const result = await scanner.scanAll();
|
||||||
|
|
||||||
|
expect(result.checked).toBe(0);
|
||||||
|
expect(result.missing).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skips downloaded items with null filePath', async () => {
|
||||||
|
await insertContent({ status: 'downloaded', filePath: null });
|
||||||
|
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
const result = await scanner.scanAll();
|
||||||
|
|
||||||
|
expect(result.checked).toBe(0);
|
||||||
|
expect(result.missing).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles mixed batch of existing and missing files', async () => {
|
||||||
|
const items = await Promise.all([
|
||||||
|
insertContent({ filePath: '/media/a.mp4', title: 'A' }),
|
||||||
|
insertContent({ filePath: '/media/b.mp4', title: 'B' }),
|
||||||
|
insertContent({ filePath: '/media/c.mp4', title: 'C' }),
|
||||||
|
]);
|
||||||
|
// Only 'a' exists
|
||||||
|
existingFiles.add('/media/a.mp4');
|
||||||
|
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
const result = await scanner.scanAll();
|
||||||
|
|
||||||
|
expect(result.checked).toBe(3);
|
||||||
|
expect(result.missing).toBe(2);
|
||||||
|
|
||||||
|
// Verify individual statuses
|
||||||
|
for (const item of items) {
|
||||||
|
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
|
||||||
|
if (item.filePath === '/media/a.mp4') {
|
||||||
|
expect(rows[0].status).toBe('downloaded');
|
||||||
|
} else {
|
||||||
|
expect(rows[0].status).toBe('missing');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('persists scan results to system_config', async () => {
|
||||||
|
await insertContent({ filePath: '/media/gone.mp4' });
|
||||||
|
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
await scanner.scanAll();
|
||||||
|
|
||||||
|
const lastScan = await scanner.getLastScanResult();
|
||||||
|
expect(lastScan).not.toBeNull();
|
||||||
|
expect(lastScan!.lastRun).toBeTruthy();
|
||||||
|
expect(lastScan!.result.checked).toBe(1);
|
||||||
|
expect(lastScan!.result.missing).toBe(1);
|
||||||
|
expect(lastScan!.result.duration).toBeGreaterThanOrEqual(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns null for getLastScanResult when no scan has run', async () => {
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
const lastScan = await scanner.getLastScanResult();
|
||||||
|
expect(lastScan).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles batching correctly with > BATCH_SIZE items', async () => {
|
||||||
|
// Insert 150 downloaded items, all missing from disk
|
||||||
|
const inserts = Array.from({ length: 150 }, (_, i) =>
|
||||||
|
insertContent({ filePath: `/media/file-${i}.mp4`, title: `Video ${i}` })
|
||||||
|
);
|
||||||
|
await Promise.all(inserts);
|
||||||
|
|
||||||
|
const scanner = new MissingFileScanner(db);
|
||||||
|
const result = await scanner.scanAll();
|
||||||
|
|
||||||
|
expect(result.checked).toBe(150);
|
||||||
|
expect(result.missing).toBe(150);
|
||||||
|
|
||||||
|
// All should be marked missing
|
||||||
|
const rows = await db
|
||||||
|
.select({ status: contentItems.status })
|
||||||
|
.from(contentItems)
|
||||||
|
.where(eq(contentItems.status, 'missing'));
|
||||||
|
expect(rows.length).toBe(150);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -16,7 +16,7 @@ export const contentItems = sqliteTable('content_items', {
|
||||||
fileSize: integer('file_size'), // bytes
|
fileSize: integer('file_size'), // bytes
|
||||||
format: text('format'), // container format e.g. 'mp4', 'webm', 'mp3'
|
format: text('format'), // container format e.g. 'mp4', 'webm', 'mp3'
|
||||||
qualityMetadata: text('quality_metadata', { mode: 'json' }), // actual quality info post-download
|
qualityMetadata: text('quality_metadata', { mode: 'json' }), // actual quality info post-download
|
||||||
status: text('status').notNull().default('monitored'), // monitored|queued|downloading|downloaded|failed|ignored
|
status: text('status').notNull().default('monitored'), // monitored|queued|downloading|downloaded|failed|ignored|missing
|
||||||
thumbnailUrl: text('thumbnail_url'),
|
thumbnailUrl: text('thumbnail_url'),
|
||||||
publishedAt: text('published_at'), // ISO datetime from platform (nullable)
|
publishedAt: text('published_at'), // ISO datetime from platform (nullable)
|
||||||
downloadedAt: text('downloaded_at'), // ISO datetime when download completed (nullable)
|
downloadedAt: text('downloaded_at'), // ISO datetime when download completed (nullable)
|
||||||
|
|
|
||||||
171
src/services/missing-file-scanner.ts
Normal file
171
src/services/missing-file-scanner.ts
Normal file
|
|
@ -0,0 +1,171 @@
|
||||||
|
import { eq, and, isNotNull, sql } from 'drizzle-orm';
|
||||||
|
import type { LibSQLDatabase } from 'drizzle-orm/libsql';
|
||||||
|
import type * as schema from '../db/schema/index';
|
||||||
|
import { contentItems } from '../db/schema/index';
|
||||||
|
import { systemConfig } from '../db/schema/index';
|
||||||
|
import { access } from 'node:fs/promises';
|
||||||
|
|
||||||
|
// ── Types ──
|
||||||
|
|
||||||
|
export interface ScanResult {
|
||||||
|
checked: number;
|
||||||
|
missing: number;
|
||||||
|
duration: number; // milliseconds
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DownloadedRow {
|
||||||
|
id: number;
|
||||||
|
filePath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
type Db = LibSQLDatabase<typeof schema>;
|
||||||
|
|
||||||
|
// ── Constants ──
|
||||||
|
|
||||||
|
const BATCH_SIZE = 100;
|
||||||
|
const SCAN_LAST_RUN_KEY = 'missing_file_scan_last_run';
|
||||||
|
const SCAN_LAST_RESULT_KEY = 'missing_file_scan_last_result';
|
||||||
|
|
||||||
|
// ── Scanner ──
|
||||||
|
|
||||||
|
export class MissingFileScanner {
|
||||||
|
constructor(private readonly db: Db) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan all content items with status='downloaded' and a non-null filePath.
|
||||||
|
* For each, check if the file exists on disk. If not, update status to 'missing'.
|
||||||
|
* Works in batches of BATCH_SIZE to bound memory usage on large libraries.
|
||||||
|
*/
|
||||||
|
async scanAll(): Promise<ScanResult> {
|
||||||
|
const start = Date.now();
|
||||||
|
let checked = 0;
|
||||||
|
let missing = 0;
|
||||||
|
let lastId = 0;
|
||||||
|
|
||||||
|
console.log('[missing-file-scanner] Scan started');
|
||||||
|
|
||||||
|
// Cursor-based pagination: since we mutate status from 'downloaded' to 'missing'
|
||||||
|
// during iteration, offset-based pagination would skip rows. Using id > lastId
|
||||||
|
// ensures we always pick up the next unconsumed batch.
|
||||||
|
while (true) {
|
||||||
|
const batch = await this.db
|
||||||
|
.select({ id: contentItems.id, filePath: contentItems.filePath })
|
||||||
|
.from(contentItems)
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(contentItems.status, 'downloaded'),
|
||||||
|
isNotNull(contentItems.filePath),
|
||||||
|
sql`${contentItems.filePath} != ''`,
|
||||||
|
sql`${contentItems.id} > ${lastId}`
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.orderBy(contentItems.id)
|
||||||
|
.limit(BATCH_SIZE);
|
||||||
|
|
||||||
|
if (batch.length === 0) break;
|
||||||
|
|
||||||
|
const missingIds: number[] = [];
|
||||||
|
|
||||||
|
for (const row of batch as DownloadedRow[]) {
|
||||||
|
checked++;
|
||||||
|
const exists = await fileExists(row.filePath);
|
||||||
|
if (!exists) {
|
||||||
|
missingIds.push(row.id);
|
||||||
|
missing++;
|
||||||
|
console.log(`[missing-file-scanner] File missing: id=${row.id} path=${row.filePath}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch-update missing items
|
||||||
|
if (missingIds.length > 0) {
|
||||||
|
await this.markMissing(missingIds);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance cursor to the last processed id
|
||||||
|
lastId = batch[batch.length - 1].id as number;
|
||||||
|
|
||||||
|
// If batch was smaller than BATCH_SIZE, we've exhausted the result set
|
||||||
|
if (batch.length < BATCH_SIZE) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const duration = Date.now() - start;
|
||||||
|
const result: ScanResult = { checked, missing, duration };
|
||||||
|
|
||||||
|
console.log(`[missing-file-scanner] Scan completed: checked=${checked} missing=${missing} duration=${duration}ms`);
|
||||||
|
|
||||||
|
// Persist scan metadata
|
||||||
|
await this.persistScanResult(result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the last scan result from system_config.
|
||||||
|
* Returns null if no scan has been run yet.
|
||||||
|
*/
|
||||||
|
async getLastScanResult(): Promise<{ lastRun: string; result: ScanResult } | null> {
|
||||||
|
const rows = await this.db
|
||||||
|
.select({ key: systemConfig.key, value: systemConfig.value })
|
||||||
|
.from(systemConfig)
|
||||||
|
.where(eq(systemConfig.key, SCAN_LAST_RUN_KEY));
|
||||||
|
|
||||||
|
if (rows.length === 0) return null;
|
||||||
|
|
||||||
|
const resultRows = await this.db
|
||||||
|
.select({ value: systemConfig.value })
|
||||||
|
.from(systemConfig)
|
||||||
|
.where(eq(systemConfig.key, SCAN_LAST_RESULT_KEY));
|
||||||
|
|
||||||
|
return {
|
||||||
|
lastRun: rows[0].value,
|
||||||
|
result: resultRows.length > 0 ? JSON.parse(resultRows[0].value) : { checked: 0, missing: 0, duration: 0 },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Private ──
|
||||||
|
|
||||||
|
private async markMissing(ids: number[]): Promise<void> {
|
||||||
|
// SQLite has a variable limit; chunk if needed, but BATCH_SIZE=100 is well within limits
|
||||||
|
await this.db
|
||||||
|
.update(contentItems)
|
||||||
|
.set({
|
||||||
|
status: 'missing',
|
||||||
|
updatedAt: sql`(datetime('now'))`,
|
||||||
|
})
|
||||||
|
.where(sql`${contentItems.id} IN (${sql.join(ids.map(id => sql`${id}`), sql`, `)})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async persistScanResult(result: ScanResult): Promise<void> {
|
||||||
|
const now = new Date().toISOString();
|
||||||
|
const resultJson = JSON.stringify(result);
|
||||||
|
|
||||||
|
// Upsert last run timestamp
|
||||||
|
await this.db
|
||||||
|
.insert(systemConfig)
|
||||||
|
.values({ key: SCAN_LAST_RUN_KEY, value: now })
|
||||||
|
.onConflictDoUpdate({
|
||||||
|
target: systemConfig.key,
|
||||||
|
set: { value: now, updatedAt: sql`(datetime('now'))` },
|
||||||
|
});
|
||||||
|
|
||||||
|
// Upsert last result
|
||||||
|
await this.db
|
||||||
|
.insert(systemConfig)
|
||||||
|
.values({ key: SCAN_LAST_RESULT_KEY, value: resultJson })
|
||||||
|
.onConflictDoUpdate({
|
||||||
|
target: systemConfig.key,
|
||||||
|
set: { value: resultJson, updatedAt: sql`(datetime('now'))` },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Helpers ──
|
||||||
|
|
||||||
|
async function fileExists(filePath: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await access(filePath);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -21,6 +21,7 @@ export const ContentStatus = {
|
||||||
Downloaded: 'downloaded',
|
Downloaded: 'downloaded',
|
||||||
Failed: 'failed',
|
Failed: 'failed',
|
||||||
Ignored: 'ignored',
|
Ignored: 'ignored',
|
||||||
|
Missing: 'missing',
|
||||||
} as const;
|
} as const;
|
||||||
export type ContentStatus = (typeof ContentStatus)[keyof typeof ContentStatus];
|
export type ContentStatus = (typeof ContentStatus)[keyof typeof ContentStatus];
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue