- "src/services/missing-file-scanner.ts" - "src/__tests__/missing-file-scanner.test.ts" - "src/types/index.ts" - "src/db/schema/content.ts" GSD-Task: S06/T01
171 lines
5 KiB
TypeScript
171 lines
5 KiB
TypeScript
import { eq, and, isNotNull, sql } from 'drizzle-orm';
|
|
import type { LibSQLDatabase } from 'drizzle-orm/libsql';
|
|
import type * as schema from '../db/schema/index';
|
|
import { contentItems } from '../db/schema/index';
|
|
import { systemConfig } from '../db/schema/index';
|
|
import { access } from 'node:fs/promises';
|
|
|
|
// ── Types ──
|
|
|
|
export interface ScanResult {
|
|
checked: number;
|
|
missing: number;
|
|
duration: number; // milliseconds
|
|
}
|
|
|
|
interface DownloadedRow {
|
|
id: number;
|
|
filePath: string;
|
|
}
|
|
|
|
type Db = LibSQLDatabase<typeof schema>;
|
|
|
|
// ── Constants ──
|
|
|
|
const BATCH_SIZE = 100;
|
|
const SCAN_LAST_RUN_KEY = 'missing_file_scan_last_run';
|
|
const SCAN_LAST_RESULT_KEY = 'missing_file_scan_last_result';
|
|
|
|
// ── Scanner ──
|
|
|
|
export class MissingFileScanner {
|
|
constructor(private readonly db: Db) {}
|
|
|
|
/**
|
|
* Scan all content items with status='downloaded' and a non-null filePath.
|
|
* For each, check if the file exists on disk. If not, update status to 'missing'.
|
|
* Works in batches of BATCH_SIZE to bound memory usage on large libraries.
|
|
*/
|
|
async scanAll(): Promise<ScanResult> {
|
|
const start = Date.now();
|
|
let checked = 0;
|
|
let missing = 0;
|
|
let lastId = 0;
|
|
|
|
console.log('[missing-file-scanner] Scan started');
|
|
|
|
// Cursor-based pagination: since we mutate status from 'downloaded' to 'missing'
|
|
// during iteration, offset-based pagination would skip rows. Using id > lastId
|
|
// ensures we always pick up the next unconsumed batch.
|
|
while (true) {
|
|
const batch = await this.db
|
|
.select({ id: contentItems.id, filePath: contentItems.filePath })
|
|
.from(contentItems)
|
|
.where(
|
|
and(
|
|
eq(contentItems.status, 'downloaded'),
|
|
isNotNull(contentItems.filePath),
|
|
sql`${contentItems.filePath} != ''`,
|
|
sql`${contentItems.id} > ${lastId}`
|
|
)
|
|
)
|
|
.orderBy(contentItems.id)
|
|
.limit(BATCH_SIZE);
|
|
|
|
if (batch.length === 0) break;
|
|
|
|
const missingIds: number[] = [];
|
|
|
|
for (const row of batch as DownloadedRow[]) {
|
|
checked++;
|
|
const exists = await fileExists(row.filePath);
|
|
if (!exists) {
|
|
missingIds.push(row.id);
|
|
missing++;
|
|
console.log(`[missing-file-scanner] File missing: id=${row.id} path=${row.filePath}`);
|
|
}
|
|
}
|
|
|
|
// Batch-update missing items
|
|
if (missingIds.length > 0) {
|
|
await this.markMissing(missingIds);
|
|
}
|
|
|
|
// Advance cursor to the last processed id
|
|
lastId = batch[batch.length - 1].id as number;
|
|
|
|
// If batch was smaller than BATCH_SIZE, we've exhausted the result set
|
|
if (batch.length < BATCH_SIZE) break;
|
|
}
|
|
|
|
const duration = Date.now() - start;
|
|
const result: ScanResult = { checked, missing, duration };
|
|
|
|
console.log(`[missing-file-scanner] Scan completed: checked=${checked} missing=${missing} duration=${duration}ms`);
|
|
|
|
// Persist scan metadata
|
|
await this.persistScanResult(result);
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Get the last scan result from system_config.
|
|
* Returns null if no scan has been run yet.
|
|
*/
|
|
async getLastScanResult(): Promise<{ lastRun: string; result: ScanResult } | null> {
|
|
const rows = await this.db
|
|
.select({ key: systemConfig.key, value: systemConfig.value })
|
|
.from(systemConfig)
|
|
.where(eq(systemConfig.key, SCAN_LAST_RUN_KEY));
|
|
|
|
if (rows.length === 0) return null;
|
|
|
|
const resultRows = await this.db
|
|
.select({ value: systemConfig.value })
|
|
.from(systemConfig)
|
|
.where(eq(systemConfig.key, SCAN_LAST_RESULT_KEY));
|
|
|
|
return {
|
|
lastRun: rows[0].value,
|
|
result: resultRows.length > 0 ? JSON.parse(resultRows[0].value) : { checked: 0, missing: 0, duration: 0 },
|
|
};
|
|
}
|
|
|
|
// ── Private ──
|
|
|
|
private async markMissing(ids: number[]): Promise<void> {
|
|
// SQLite has a variable limit; chunk if needed, but BATCH_SIZE=100 is well within limits
|
|
await this.db
|
|
.update(contentItems)
|
|
.set({
|
|
status: 'missing',
|
|
updatedAt: sql`(datetime('now'))`,
|
|
})
|
|
.where(sql`${contentItems.id} IN (${sql.join(ids.map(id => sql`${id}`), sql`, `)})`);
|
|
}
|
|
|
|
private async persistScanResult(result: ScanResult): Promise<void> {
|
|
const now = new Date().toISOString();
|
|
const resultJson = JSON.stringify(result);
|
|
|
|
// Upsert last run timestamp
|
|
await this.db
|
|
.insert(systemConfig)
|
|
.values({ key: SCAN_LAST_RUN_KEY, value: now })
|
|
.onConflictDoUpdate({
|
|
target: systemConfig.key,
|
|
set: { value: now, updatedAt: sql`(datetime('now'))` },
|
|
});
|
|
|
|
// Upsert last result
|
|
await this.db
|
|
.insert(systemConfig)
|
|
.values({ key: SCAN_LAST_RESULT_KEY, value: resultJson })
|
|
.onConflictDoUpdate({
|
|
target: systemConfig.key,
|
|
set: { value: resultJson, updatedAt: sql`(datetime('now'))` },
|
|
});
|
|
}
|
|
}
|
|
|
|
// ── Helpers ──
|
|
|
|
async function fileExists(filePath: string): Promise<boolean> {
|
|
try {
|
|
await access(filePath);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|