From 61da729fa4d65d2e06f6942e08426e1d3d4f3688 Mon Sep 17 00:00:00 2001 From: jlightner Date: Sat, 4 Apr 2026 06:31:11 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20Add=20MissingFileScanner=20service=20wi?= =?UTF-8?q?th=20cursor-based=20batched=20filesyst=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "src/services/missing-file-scanner.ts" - "src/__tests__/missing-file-scanner.test.ts" - "src/types/index.ts" - "src/db/schema/content.ts" GSD-Task: S06/T01 --- src/__tests__/missing-file-scanner.test.ts | 207 +++++++++++++++++++++ src/db/schema/content.ts | 2 +- src/services/missing-file-scanner.ts | 171 +++++++++++++++++ src/types/index.ts | 1 + 4 files changed, 380 insertions(+), 1 deletion(-) create mode 100644 src/__tests__/missing-file-scanner.test.ts create mode 100644 src/services/missing-file-scanner.ts diff --git a/src/__tests__/missing-file-scanner.test.ts b/src/__tests__/missing-file-scanner.test.ts new file mode 100644 index 0000000..570b6bd --- /dev/null +++ b/src/__tests__/missing-file-scanner.test.ts @@ -0,0 +1,207 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, rmSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { initDatabaseAsync, closeDatabase } from '../db/index'; +import { runMigrations } from '../db/migrate'; +import { contentItems, systemConfig } from '../db/schema/index'; +import { eq } from 'drizzle-orm'; + +// ── Mock fs/promises.access to control which files "exist" ── +const existingFiles = new Set(); + +vi.mock('node:fs/promises', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + access: vi.fn(async (filePath: string) => { + if (!existingFiles.has(filePath)) { + const err = new Error(`ENOENT: no such file or directory, access '${filePath}'`) as NodeJS.ErrnoException; + err.code = 'ENOENT'; + throw err; + } + }), + }; +}); + +import { MissingFileScanner } from '../services/missing-file-scanner'; + +// ── Test Helpers ── + +let tmpDir: string; +let db: Awaited>; + +async function setupDb() { + tmpDir = mkdtempSync(join(tmpdir(), 'tubearr-missing-scan-')); + const dbPath = join(tmpDir, 'test.db'); + db = await initDatabaseAsync(dbPath); + await runMigrations(dbPath); + return db; +} + +function cleanup() { + closeDatabase(); + existingFiles.clear(); + try { + if (tmpDir && existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + } catch { + // best-effort + } +} + +/** Insert a content item with the given status and filePath. */ +async function insertContent( + overrides: { status?: string; filePath?: string | null; title?: string } = {} +) { + const result = await db + .insert(contentItems) + .values({ + title: overrides.title ?? 'Test Video', + platformContentId: `plat-${Date.now()}-${Math.random()}`, + url: 'https://example.com/video', + contentType: 'video', + status: overrides.status ?? 'downloaded', + filePath: 'filePath' in overrides ? overrides.filePath : '/media/test-video.mp4', + }) + .returning(); + return result[0]; +} + +// ── Tests ── + +describe('MissingFileScanner', () => { + beforeEach(async () => { + await setupDb(); + }); + + afterEach(cleanup); + + it('returns zero counts when no downloaded items exist', async () => { + const scanner = new MissingFileScanner(db); + const result = await scanner.scanAll(); + + expect(result.checked).toBe(0); + expect(result.missing).toBe(0); + expect(result.duration).toBeGreaterThanOrEqual(0); + }); + + it('does not flag items whose files exist on disk', async () => { + const item = await insertContent({ filePath: '/media/exists.mp4' }); + existingFiles.add('/media/exists.mp4'); + + const scanner = new MissingFileScanner(db); + const result = await scanner.scanAll(); + + expect(result.checked).toBe(1); + expect(result.missing).toBe(0); + + // Status should remain 'downloaded' + const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id)); + expect(rows[0].status).toBe('downloaded'); + }); + + it('marks items as missing when file does not exist', async () => { + const item = await insertContent({ filePath: '/media/gone.mp4' }); + // Don't add to existingFiles — file is "missing" + + const scanner = new MissingFileScanner(db); + const result = await scanner.scanAll(); + + expect(result.checked).toBe(1); + expect(result.missing).toBe(1); + + const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id)); + expect(rows[0].status).toBe('missing'); + }); + + it('skips items with non-downloaded status', async () => { + await insertContent({ status: 'monitored', filePath: '/media/monitored.mp4' }); + await insertContent({ status: 'queued', filePath: '/media/queued.mp4' }); + await insertContent({ status: 'failed', filePath: '/media/failed.mp4' }); + + const scanner = new MissingFileScanner(db); + const result = await scanner.scanAll(); + + expect(result.checked).toBe(0); + expect(result.missing).toBe(0); + }); + + it('skips downloaded items with null filePath', async () => { + await insertContent({ status: 'downloaded', filePath: null }); + + const scanner = new MissingFileScanner(db); + const result = await scanner.scanAll(); + + expect(result.checked).toBe(0); + expect(result.missing).toBe(0); + }); + + it('handles mixed batch of existing and missing files', async () => { + const items = await Promise.all([ + insertContent({ filePath: '/media/a.mp4', title: 'A' }), + insertContent({ filePath: '/media/b.mp4', title: 'B' }), + insertContent({ filePath: '/media/c.mp4', title: 'C' }), + ]); + // Only 'a' exists + existingFiles.add('/media/a.mp4'); + + const scanner = new MissingFileScanner(db); + const result = await scanner.scanAll(); + + expect(result.checked).toBe(3); + expect(result.missing).toBe(2); + + // Verify individual statuses + for (const item of items) { + const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id)); + if (item.filePath === '/media/a.mp4') { + expect(rows[0].status).toBe('downloaded'); + } else { + expect(rows[0].status).toBe('missing'); + } + } + }); + + it('persists scan results to system_config', async () => { + await insertContent({ filePath: '/media/gone.mp4' }); + + const scanner = new MissingFileScanner(db); + await scanner.scanAll(); + + const lastScan = await scanner.getLastScanResult(); + expect(lastScan).not.toBeNull(); + expect(lastScan!.lastRun).toBeTruthy(); + expect(lastScan!.result.checked).toBe(1); + expect(lastScan!.result.missing).toBe(1); + expect(lastScan!.result.duration).toBeGreaterThanOrEqual(0); + }); + + it('returns null for getLastScanResult when no scan has run', async () => { + const scanner = new MissingFileScanner(db); + const lastScan = await scanner.getLastScanResult(); + expect(lastScan).toBeNull(); + }); + + it('handles batching correctly with > BATCH_SIZE items', async () => { + // Insert 150 downloaded items, all missing from disk + const inserts = Array.from({ length: 150 }, (_, i) => + insertContent({ filePath: `/media/file-${i}.mp4`, title: `Video ${i}` }) + ); + await Promise.all(inserts); + + const scanner = new MissingFileScanner(db); + const result = await scanner.scanAll(); + + expect(result.checked).toBe(150); + expect(result.missing).toBe(150); + + // All should be marked missing + const rows = await db + .select({ status: contentItems.status }) + .from(contentItems) + .where(eq(contentItems.status, 'missing')); + expect(rows.length).toBe(150); + }); +}); diff --git a/src/db/schema/content.ts b/src/db/schema/content.ts index 877f675..52e1a0e 100644 --- a/src/db/schema/content.ts +++ b/src/db/schema/content.ts @@ -16,7 +16,7 @@ export const contentItems = sqliteTable('content_items', { fileSize: integer('file_size'), // bytes format: text('format'), // container format e.g. 'mp4', 'webm', 'mp3' qualityMetadata: text('quality_metadata', { mode: 'json' }), // actual quality info post-download - status: text('status').notNull().default('monitored'), // monitored|queued|downloading|downloaded|failed|ignored + status: text('status').notNull().default('monitored'), // monitored|queued|downloading|downloaded|failed|ignored|missing thumbnailUrl: text('thumbnail_url'), publishedAt: text('published_at'), // ISO datetime from platform (nullable) downloadedAt: text('downloaded_at'), // ISO datetime when download completed (nullable) diff --git a/src/services/missing-file-scanner.ts b/src/services/missing-file-scanner.ts new file mode 100644 index 0000000..296f1f3 --- /dev/null +++ b/src/services/missing-file-scanner.ts @@ -0,0 +1,171 @@ +import { eq, and, isNotNull, sql } from 'drizzle-orm'; +import type { LibSQLDatabase } from 'drizzle-orm/libsql'; +import type * as schema from '../db/schema/index'; +import { contentItems } from '../db/schema/index'; +import { systemConfig } from '../db/schema/index'; +import { access } from 'node:fs/promises'; + +// ── Types ── + +export interface ScanResult { + checked: number; + missing: number; + duration: number; // milliseconds +} + +interface DownloadedRow { + id: number; + filePath: string; +} + +type Db = LibSQLDatabase; + +// ── Constants ── + +const BATCH_SIZE = 100; +const SCAN_LAST_RUN_KEY = 'missing_file_scan_last_run'; +const SCAN_LAST_RESULT_KEY = 'missing_file_scan_last_result'; + +// ── Scanner ── + +export class MissingFileScanner { + constructor(private readonly db: Db) {} + + /** + * Scan all content items with status='downloaded' and a non-null filePath. + * For each, check if the file exists on disk. If not, update status to 'missing'. + * Works in batches of BATCH_SIZE to bound memory usage on large libraries. + */ + async scanAll(): Promise { + const start = Date.now(); + let checked = 0; + let missing = 0; + let lastId = 0; + + console.log('[missing-file-scanner] Scan started'); + + // Cursor-based pagination: since we mutate status from 'downloaded' to 'missing' + // during iteration, offset-based pagination would skip rows. Using id > lastId + // ensures we always pick up the next unconsumed batch. + while (true) { + const batch = await this.db + .select({ id: contentItems.id, filePath: contentItems.filePath }) + .from(contentItems) + .where( + and( + eq(contentItems.status, 'downloaded'), + isNotNull(contentItems.filePath), + sql`${contentItems.filePath} != ''`, + sql`${contentItems.id} > ${lastId}` + ) + ) + .orderBy(contentItems.id) + .limit(BATCH_SIZE); + + if (batch.length === 0) break; + + const missingIds: number[] = []; + + for (const row of batch as DownloadedRow[]) { + checked++; + const exists = await fileExists(row.filePath); + if (!exists) { + missingIds.push(row.id); + missing++; + console.log(`[missing-file-scanner] File missing: id=${row.id} path=${row.filePath}`); + } + } + + // Batch-update missing items + if (missingIds.length > 0) { + await this.markMissing(missingIds); + } + + // Advance cursor to the last processed id + lastId = batch[batch.length - 1].id as number; + + // If batch was smaller than BATCH_SIZE, we've exhausted the result set + if (batch.length < BATCH_SIZE) break; + } + + const duration = Date.now() - start; + const result: ScanResult = { checked, missing, duration }; + + console.log(`[missing-file-scanner] Scan completed: checked=${checked} missing=${missing} duration=${duration}ms`); + + // Persist scan metadata + await this.persistScanResult(result); + + return result; + } + + /** + * Get the last scan result from system_config. + * Returns null if no scan has been run yet. + */ + async getLastScanResult(): Promise<{ lastRun: string; result: ScanResult } | null> { + const rows = await this.db + .select({ key: systemConfig.key, value: systemConfig.value }) + .from(systemConfig) + .where(eq(systemConfig.key, SCAN_LAST_RUN_KEY)); + + if (rows.length === 0) return null; + + const resultRows = await this.db + .select({ value: systemConfig.value }) + .from(systemConfig) + .where(eq(systemConfig.key, SCAN_LAST_RESULT_KEY)); + + return { + lastRun: rows[0].value, + result: resultRows.length > 0 ? JSON.parse(resultRows[0].value) : { checked: 0, missing: 0, duration: 0 }, + }; + } + + // ── Private ── + + private async markMissing(ids: number[]): Promise { + // SQLite has a variable limit; chunk if needed, but BATCH_SIZE=100 is well within limits + await this.db + .update(contentItems) + .set({ + status: 'missing', + updatedAt: sql`(datetime('now'))`, + }) + .where(sql`${contentItems.id} IN (${sql.join(ids.map(id => sql`${id}`), sql`, `)})`); + } + + private async persistScanResult(result: ScanResult): Promise { + const now = new Date().toISOString(); + const resultJson = JSON.stringify(result); + + // Upsert last run timestamp + await this.db + .insert(systemConfig) + .values({ key: SCAN_LAST_RUN_KEY, value: now }) + .onConflictDoUpdate({ + target: systemConfig.key, + set: { value: now, updatedAt: sql`(datetime('now'))` }, + }); + + // Upsert last result + await this.db + .insert(systemConfig) + .values({ key: SCAN_LAST_RESULT_KEY, value: resultJson }) + .onConflictDoUpdate({ + target: systemConfig.key, + set: { value: resultJson, updatedAt: sql`(datetime('now'))` }, + }); + } +} + +// ── Helpers ── + +async function fileExists(filePath: string): Promise { + try { + await access(filePath); + return true; + } catch { + return false; + } +} diff --git a/src/types/index.ts b/src/types/index.ts index b619bca..06581f5 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -21,6 +21,7 @@ export const ContentStatus = { Downloaded: 'downloaded', Failed: 'failed', Ignored: 'ignored', + Missing: 'missing', } as const; export type ContentStatus = (typeof ContentStatus)[keyof typeof ContentStatus];