import { eq, and, isNotNull, sql } from 'drizzle-orm'; import type { LibSQLDatabase } from 'drizzle-orm/libsql'; import type * as schema from '../db/schema/index'; import { contentItems } from '../db/schema/index'; import { systemConfig } from '../db/schema/index'; import { access } from 'node:fs/promises'; // ── Types ── export interface ScanResult { checked: number; missing: number; duration: number; // milliseconds } interface DownloadedRow { id: number; filePath: string; } type Db = LibSQLDatabase; // ── Constants ── const BATCH_SIZE = 100; const SCAN_LAST_RUN_KEY = 'missing_file_scan_last_run'; const SCAN_LAST_RESULT_KEY = 'missing_file_scan_last_result'; // ── Scanner ── export class MissingFileScanner { constructor(private readonly db: Db) {} /** * Scan all content items with status='downloaded' and a non-null filePath. * For each, check if the file exists on disk. If not, update status to 'missing'. * Works in batches of BATCH_SIZE to bound memory usage on large libraries. */ async scanAll(): Promise { const start = Date.now(); let checked = 0; let missing = 0; let lastId = 0; console.log('[missing-file-scanner] Scan started'); // Cursor-based pagination: since we mutate status from 'downloaded' to 'missing' // during iteration, offset-based pagination would skip rows. Using id > lastId // ensures we always pick up the next unconsumed batch. while (true) { const batch = await this.db .select({ id: contentItems.id, filePath: contentItems.filePath }) .from(contentItems) .where( and( eq(contentItems.status, 'downloaded'), isNotNull(contentItems.filePath), sql`${contentItems.filePath} != ''`, sql`${contentItems.id} > ${lastId}` ) ) .orderBy(contentItems.id) .limit(BATCH_SIZE); if (batch.length === 0) break; const missingIds: number[] = []; for (const row of batch as DownloadedRow[]) { checked++; const exists = await fileExists(row.filePath); if (!exists) { missingIds.push(row.id); missing++; console.log(`[missing-file-scanner] File missing: id=${row.id} path=${row.filePath}`); } } // Batch-update missing items if (missingIds.length > 0) { await this.markMissing(missingIds); } // Advance cursor to the last processed id lastId = batch[batch.length - 1].id as number; // If batch was smaller than BATCH_SIZE, we've exhausted the result set if (batch.length < BATCH_SIZE) break; } const duration = Date.now() - start; const result: ScanResult = { checked, missing, duration }; console.log(`[missing-file-scanner] Scan completed: checked=${checked} missing=${missing} duration=${duration}ms`); // Persist scan metadata await this.persistScanResult(result); return result; } /** * Get the last scan result from system_config. * Returns null if no scan has been run yet. */ async getLastScanResult(): Promise<{ lastRun: string; result: ScanResult } | null> { const rows = await this.db .select({ key: systemConfig.key, value: systemConfig.value }) .from(systemConfig) .where(eq(systemConfig.key, SCAN_LAST_RUN_KEY)); if (rows.length === 0) return null; const resultRows = await this.db .select({ value: systemConfig.value }) .from(systemConfig) .where(eq(systemConfig.key, SCAN_LAST_RESULT_KEY)); return { lastRun: rows[0].value, result: resultRows.length > 0 ? JSON.parse(resultRows[0].value) : { checked: 0, missing: 0, duration: 0 }, }; } // ── Private ── private async markMissing(ids: number[]): Promise { // SQLite has a variable limit; chunk if needed, but BATCH_SIZE=100 is well within limits await this.db .update(contentItems) .set({ status: 'missing', updatedAt: sql`(datetime('now'))`, }) .where(sql`${contentItems.id} IN (${sql.join(ids.map(id => sql`${id}`), sql`, `)})`); } private async persistScanResult(result: ScanResult): Promise { const now = new Date().toISOString(); const resultJson = JSON.stringify(result); // Upsert last run timestamp await this.db .insert(systemConfig) .values({ key: SCAN_LAST_RUN_KEY, value: now }) .onConflictDoUpdate({ target: systemConfig.key, set: { value: now, updatedAt: sql`(datetime('now'))` }, }); // Upsert last result await this.db .insert(systemConfig) .values({ key: SCAN_LAST_RESULT_KEY, value: resultJson }) .onConflictDoUpdate({ target: systemConfig.key, set: { value: resultJson, updatedAt: sql`(datetime('now'))` }, }); } } // ── Helpers ── async function fileExists(filePath: string): Promise { try { await access(filePath); return true; } catch { return false; } }