tubearr/src/services/missing-file-scanner.ts
jlightner 61da729fa4 feat: Add MissingFileScanner service with cursor-based batched filesyst…
- "src/services/missing-file-scanner.ts"
- "src/__tests__/missing-file-scanner.test.ts"
- "src/types/index.ts"
- "src/db/schema/content.ts"

GSD-Task: S06/T01
2026-04-04 06:31:11 +00:00

171 lines
5 KiB
TypeScript

import { eq, and, isNotNull, sql } from 'drizzle-orm';
import type { LibSQLDatabase } from 'drizzle-orm/libsql';
import type * as schema from '../db/schema/index';
import { contentItems } from '../db/schema/index';
import { systemConfig } from '../db/schema/index';
import { access } from 'node:fs/promises';
// ── Types ──
export interface ScanResult {
checked: number;
missing: number;
duration: number; // milliseconds
}
interface DownloadedRow {
id: number;
filePath: string;
}
type Db = LibSQLDatabase<typeof schema>;
// ── Constants ──
const BATCH_SIZE = 100;
const SCAN_LAST_RUN_KEY = 'missing_file_scan_last_run';
const SCAN_LAST_RESULT_KEY = 'missing_file_scan_last_result';
// ── Scanner ──
export class MissingFileScanner {
constructor(private readonly db: Db) {}
/**
* Scan all content items with status='downloaded' and a non-null filePath.
* For each, check if the file exists on disk. If not, update status to 'missing'.
* Works in batches of BATCH_SIZE to bound memory usage on large libraries.
*/
async scanAll(): Promise<ScanResult> {
const start = Date.now();
let checked = 0;
let missing = 0;
let lastId = 0;
console.log('[missing-file-scanner] Scan started');
// Cursor-based pagination: since we mutate status from 'downloaded' to 'missing'
// during iteration, offset-based pagination would skip rows. Using id > lastId
// ensures we always pick up the next unconsumed batch.
while (true) {
const batch = await this.db
.select({ id: contentItems.id, filePath: contentItems.filePath })
.from(contentItems)
.where(
and(
eq(contentItems.status, 'downloaded'),
isNotNull(contentItems.filePath),
sql`${contentItems.filePath} != ''`,
sql`${contentItems.id} > ${lastId}`
)
)
.orderBy(contentItems.id)
.limit(BATCH_SIZE);
if (batch.length === 0) break;
const missingIds: number[] = [];
for (const row of batch as DownloadedRow[]) {
checked++;
const exists = await fileExists(row.filePath);
if (!exists) {
missingIds.push(row.id);
missing++;
console.log(`[missing-file-scanner] File missing: id=${row.id} path=${row.filePath}`);
}
}
// Batch-update missing items
if (missingIds.length > 0) {
await this.markMissing(missingIds);
}
// Advance cursor to the last processed id
lastId = batch[batch.length - 1].id as number;
// If batch was smaller than BATCH_SIZE, we've exhausted the result set
if (batch.length < BATCH_SIZE) break;
}
const duration = Date.now() - start;
const result: ScanResult = { checked, missing, duration };
console.log(`[missing-file-scanner] Scan completed: checked=${checked} missing=${missing} duration=${duration}ms`);
// Persist scan metadata
await this.persistScanResult(result);
return result;
}
/**
* Get the last scan result from system_config.
* Returns null if no scan has been run yet.
*/
async getLastScanResult(): Promise<{ lastRun: string; result: ScanResult } | null> {
const rows = await this.db
.select({ key: systemConfig.key, value: systemConfig.value })
.from(systemConfig)
.where(eq(systemConfig.key, SCAN_LAST_RUN_KEY));
if (rows.length === 0) return null;
const resultRows = await this.db
.select({ value: systemConfig.value })
.from(systemConfig)
.where(eq(systemConfig.key, SCAN_LAST_RESULT_KEY));
return {
lastRun: rows[0].value,
result: resultRows.length > 0 ? JSON.parse(resultRows[0].value) : { checked: 0, missing: 0, duration: 0 },
};
}
// ── Private ──
private async markMissing(ids: number[]): Promise<void> {
// SQLite has a variable limit; chunk if needed, but BATCH_SIZE=100 is well within limits
await this.db
.update(contentItems)
.set({
status: 'missing',
updatedAt: sql`(datetime('now'))`,
})
.where(sql`${contentItems.id} IN (${sql.join(ids.map(id => sql`${id}`), sql`, `)})`);
}
private async persistScanResult(result: ScanResult): Promise<void> {
const now = new Date().toISOString();
const resultJson = JSON.stringify(result);
// Upsert last run timestamp
await this.db
.insert(systemConfig)
.values({ key: SCAN_LAST_RUN_KEY, value: now })
.onConflictDoUpdate({
target: systemConfig.key,
set: { value: now, updatedAt: sql`(datetime('now'))` },
});
// Upsert last result
await this.db
.insert(systemConfig)
.values({ key: SCAN_LAST_RESULT_KEY, value: resultJson })
.onConflictDoUpdate({
target: systemConfig.key,
set: { value: resultJson, updatedAt: sql`(datetime('now'))` },
});
}
}
// ── Helpers ──
async function fileExists(filePath: string): Promise<boolean> {
try {
await access(filePath);
return true;
} catch {
return false;
}
}