feat: Add MissingFileScanner service with cursor-based batched filesyst…

- "src/services/missing-file-scanner.ts"
- "src/__tests__/missing-file-scanner.test.ts"
- "src/types/index.ts"
- "src/db/schema/content.ts"

GSD-Task: S06/T01
This commit is contained in:
jlightner 2026-04-04 06:31:11 +00:00
parent c0ac8cadd5
commit 61da729fa4
4 changed files with 380 additions and 1 deletions

View file

@ -0,0 +1,207 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { mkdtempSync, rmSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { initDatabaseAsync, closeDatabase } from '../db/index';
import { runMigrations } from '../db/migrate';
import { contentItems, systemConfig } from '../db/schema/index';
import { eq } from 'drizzle-orm';
// ── Mock fs/promises.access to control which files "exist" ──
const existingFiles = new Set<string>();
vi.mock('node:fs/promises', async (importOriginal) => {
const actual = await importOriginal<typeof import('node:fs/promises')>();
return {
...actual,
access: vi.fn(async (filePath: string) => {
if (!existingFiles.has(filePath)) {
const err = new Error(`ENOENT: no such file or directory, access '${filePath}'`) as NodeJS.ErrnoException;
err.code = 'ENOENT';
throw err;
}
}),
};
});
import { MissingFileScanner } from '../services/missing-file-scanner';
// ── Test Helpers ──
let tmpDir: string;
let db: Awaited<ReturnType<typeof initDatabaseAsync>>;
async function setupDb() {
tmpDir = mkdtempSync(join(tmpdir(), 'tubearr-missing-scan-'));
const dbPath = join(tmpDir, 'test.db');
db = await initDatabaseAsync(dbPath);
await runMigrations(dbPath);
return db;
}
function cleanup() {
closeDatabase();
existingFiles.clear();
try {
if (tmpDir && existsSync(tmpDir)) {
rmSync(tmpDir, { recursive: true, force: true });
}
} catch {
// best-effort
}
}
/** Insert a content item with the given status and filePath. */
async function insertContent(
overrides: { status?: string; filePath?: string | null; title?: string } = {}
) {
const result = await db
.insert(contentItems)
.values({
title: overrides.title ?? 'Test Video',
platformContentId: `plat-${Date.now()}-${Math.random()}`,
url: 'https://example.com/video',
contentType: 'video',
status: overrides.status ?? 'downloaded',
filePath: 'filePath' in overrides ? overrides.filePath : '/media/test-video.mp4',
})
.returning();
return result[0];
}
// ── Tests ──
describe('MissingFileScanner', () => {
beforeEach(async () => {
await setupDb();
});
afterEach(cleanup);
it('returns zero counts when no downloaded items exist', async () => {
const scanner = new MissingFileScanner(db);
const result = await scanner.scanAll();
expect(result.checked).toBe(0);
expect(result.missing).toBe(0);
expect(result.duration).toBeGreaterThanOrEqual(0);
});
it('does not flag items whose files exist on disk', async () => {
const item = await insertContent({ filePath: '/media/exists.mp4' });
existingFiles.add('/media/exists.mp4');
const scanner = new MissingFileScanner(db);
const result = await scanner.scanAll();
expect(result.checked).toBe(1);
expect(result.missing).toBe(0);
// Status should remain 'downloaded'
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
expect(rows[0].status).toBe('downloaded');
});
it('marks items as missing when file does not exist', async () => {
const item = await insertContent({ filePath: '/media/gone.mp4' });
// Don't add to existingFiles — file is "missing"
const scanner = new MissingFileScanner(db);
const result = await scanner.scanAll();
expect(result.checked).toBe(1);
expect(result.missing).toBe(1);
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
expect(rows[0].status).toBe('missing');
});
it('skips items with non-downloaded status', async () => {
await insertContent({ status: 'monitored', filePath: '/media/monitored.mp4' });
await insertContent({ status: 'queued', filePath: '/media/queued.mp4' });
await insertContent({ status: 'failed', filePath: '/media/failed.mp4' });
const scanner = new MissingFileScanner(db);
const result = await scanner.scanAll();
expect(result.checked).toBe(0);
expect(result.missing).toBe(0);
});
it('skips downloaded items with null filePath', async () => {
await insertContent({ status: 'downloaded', filePath: null });
const scanner = new MissingFileScanner(db);
const result = await scanner.scanAll();
expect(result.checked).toBe(0);
expect(result.missing).toBe(0);
});
it('handles mixed batch of existing and missing files', async () => {
const items = await Promise.all([
insertContent({ filePath: '/media/a.mp4', title: 'A' }),
insertContent({ filePath: '/media/b.mp4', title: 'B' }),
insertContent({ filePath: '/media/c.mp4', title: 'C' }),
]);
// Only 'a' exists
existingFiles.add('/media/a.mp4');
const scanner = new MissingFileScanner(db);
const result = await scanner.scanAll();
expect(result.checked).toBe(3);
expect(result.missing).toBe(2);
// Verify individual statuses
for (const item of items) {
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
if (item.filePath === '/media/a.mp4') {
expect(rows[0].status).toBe('downloaded');
} else {
expect(rows[0].status).toBe('missing');
}
}
});
it('persists scan results to system_config', async () => {
await insertContent({ filePath: '/media/gone.mp4' });
const scanner = new MissingFileScanner(db);
await scanner.scanAll();
const lastScan = await scanner.getLastScanResult();
expect(lastScan).not.toBeNull();
expect(lastScan!.lastRun).toBeTruthy();
expect(lastScan!.result.checked).toBe(1);
expect(lastScan!.result.missing).toBe(1);
expect(lastScan!.result.duration).toBeGreaterThanOrEqual(0);
});
it('returns null for getLastScanResult when no scan has run', async () => {
const scanner = new MissingFileScanner(db);
const lastScan = await scanner.getLastScanResult();
expect(lastScan).toBeNull();
});
it('handles batching correctly with > BATCH_SIZE items', async () => {
// Insert 150 downloaded items, all missing from disk
const inserts = Array.from({ length: 150 }, (_, i) =>
insertContent({ filePath: `/media/file-${i}.mp4`, title: `Video ${i}` })
);
await Promise.all(inserts);
const scanner = new MissingFileScanner(db);
const result = await scanner.scanAll();
expect(result.checked).toBe(150);
expect(result.missing).toBe(150);
// All should be marked missing
const rows = await db
.select({ status: contentItems.status })
.from(contentItems)
.where(eq(contentItems.status, 'missing'));
expect(rows.length).toBe(150);
});
});

View file

@ -16,7 +16,7 @@ export const contentItems = sqliteTable('content_items', {
fileSize: integer('file_size'), // bytes
format: text('format'), // container format e.g. 'mp4', 'webm', 'mp3'
qualityMetadata: text('quality_metadata', { mode: 'json' }), // actual quality info post-download
status: text('status').notNull().default('monitored'), // monitored|queued|downloading|downloaded|failed|ignored
status: text('status').notNull().default('monitored'), // monitored|queued|downloading|downloaded|failed|ignored|missing
thumbnailUrl: text('thumbnail_url'),
publishedAt: text('published_at'), // ISO datetime from platform (nullable)
downloadedAt: text('downloaded_at'), // ISO datetime when download completed (nullable)

View file

@ -0,0 +1,171 @@
import { eq, and, isNotNull, sql } from 'drizzle-orm';
import type { LibSQLDatabase } from 'drizzle-orm/libsql';
import type * as schema from '../db/schema/index';
import { contentItems } from '../db/schema/index';
import { systemConfig } from '../db/schema/index';
import { access } from 'node:fs/promises';
// ── Types ──
export interface ScanResult {
checked: number;
missing: number;
duration: number; // milliseconds
}
interface DownloadedRow {
id: number;
filePath: string;
}
type Db = LibSQLDatabase<typeof schema>;
// ── Constants ──
const BATCH_SIZE = 100;
const SCAN_LAST_RUN_KEY = 'missing_file_scan_last_run';
const SCAN_LAST_RESULT_KEY = 'missing_file_scan_last_result';
// ── Scanner ──
export class MissingFileScanner {
constructor(private readonly db: Db) {}
/**
* Scan all content items with status='downloaded' and a non-null filePath.
* For each, check if the file exists on disk. If not, update status to 'missing'.
* Works in batches of BATCH_SIZE to bound memory usage on large libraries.
*/
async scanAll(): Promise<ScanResult> {
const start = Date.now();
let checked = 0;
let missing = 0;
let lastId = 0;
console.log('[missing-file-scanner] Scan started');
// Cursor-based pagination: since we mutate status from 'downloaded' to 'missing'
// during iteration, offset-based pagination would skip rows. Using id > lastId
// ensures we always pick up the next unconsumed batch.
while (true) {
const batch = await this.db
.select({ id: contentItems.id, filePath: contentItems.filePath })
.from(contentItems)
.where(
and(
eq(contentItems.status, 'downloaded'),
isNotNull(contentItems.filePath),
sql`${contentItems.filePath} != ''`,
sql`${contentItems.id} > ${lastId}`
)
)
.orderBy(contentItems.id)
.limit(BATCH_SIZE);
if (batch.length === 0) break;
const missingIds: number[] = [];
for (const row of batch as DownloadedRow[]) {
checked++;
const exists = await fileExists(row.filePath);
if (!exists) {
missingIds.push(row.id);
missing++;
console.log(`[missing-file-scanner] File missing: id=${row.id} path=${row.filePath}`);
}
}
// Batch-update missing items
if (missingIds.length > 0) {
await this.markMissing(missingIds);
}
// Advance cursor to the last processed id
lastId = batch[batch.length - 1].id as number;
// If batch was smaller than BATCH_SIZE, we've exhausted the result set
if (batch.length < BATCH_SIZE) break;
}
const duration = Date.now() - start;
const result: ScanResult = { checked, missing, duration };
console.log(`[missing-file-scanner] Scan completed: checked=${checked} missing=${missing} duration=${duration}ms`);
// Persist scan metadata
await this.persistScanResult(result);
return result;
}
/**
* Get the last scan result from system_config.
* Returns null if no scan has been run yet.
*/
async getLastScanResult(): Promise<{ lastRun: string; result: ScanResult } | null> {
const rows = await this.db
.select({ key: systemConfig.key, value: systemConfig.value })
.from(systemConfig)
.where(eq(systemConfig.key, SCAN_LAST_RUN_KEY));
if (rows.length === 0) return null;
const resultRows = await this.db
.select({ value: systemConfig.value })
.from(systemConfig)
.where(eq(systemConfig.key, SCAN_LAST_RESULT_KEY));
return {
lastRun: rows[0].value,
result: resultRows.length > 0 ? JSON.parse(resultRows[0].value) : { checked: 0, missing: 0, duration: 0 },
};
}
// ── Private ──
private async markMissing(ids: number[]): Promise<void> {
// SQLite has a variable limit; chunk if needed, but BATCH_SIZE=100 is well within limits
await this.db
.update(contentItems)
.set({
status: 'missing',
updatedAt: sql`(datetime('now'))`,
})
.where(sql`${contentItems.id} IN (${sql.join(ids.map(id => sql`${id}`), sql`, `)})`);
}
private async persistScanResult(result: ScanResult): Promise<void> {
const now = new Date().toISOString();
const resultJson = JSON.stringify(result);
// Upsert last run timestamp
await this.db
.insert(systemConfig)
.values({ key: SCAN_LAST_RUN_KEY, value: now })
.onConflictDoUpdate({
target: systemConfig.key,
set: { value: now, updatedAt: sql`(datetime('now'))` },
});
// Upsert last result
await this.db
.insert(systemConfig)
.values({ key: SCAN_LAST_RESULT_KEY, value: resultJson })
.onConflictDoUpdate({
target: systemConfig.key,
set: { value: resultJson, updatedAt: sql`(datetime('now'))` },
});
}
}
// ── Helpers ──
async function fileExists(filePath: string): Promise<boolean> {
try {
await access(filePath);
return true;
} catch {
return false;
}
}

View file

@ -21,6 +21,7 @@ export const ContentStatus = {
Downloaded: 'downloaded',
Failed: 'failed',
Ignored: 'ignored',
Missing: 'missing',
} as const;
export type ContentStatus = (typeof ContentStatus)[keyof typeof ContentStatus];