feat: Add MissingFileScanner service with cursor-based batched filesyst…
- "src/services/missing-file-scanner.ts" - "src/__tests__/missing-file-scanner.test.ts" - "src/types/index.ts" - "src/db/schema/content.ts" GSD-Task: S06/T01
This commit is contained in:
parent
c0ac8cadd5
commit
61da729fa4
4 changed files with 380 additions and 1 deletions
207
src/__tests__/missing-file-scanner.test.ts
Normal file
207
src/__tests__/missing-file-scanner.test.ts
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { mkdtempSync, rmSync, existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { initDatabaseAsync, closeDatabase } from '../db/index';
|
||||
import { runMigrations } from '../db/migrate';
|
||||
import { contentItems, systemConfig } from '../db/schema/index';
|
||||
import { eq } from 'drizzle-orm';
|
||||
|
||||
// ── Mock fs/promises.access to control which files "exist" ──
|
||||
const existingFiles = new Set<string>();
|
||||
|
||||
vi.mock('node:fs/promises', async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import('node:fs/promises')>();
|
||||
return {
|
||||
...actual,
|
||||
access: vi.fn(async (filePath: string) => {
|
||||
if (!existingFiles.has(filePath)) {
|
||||
const err = new Error(`ENOENT: no such file or directory, access '${filePath}'`) as NodeJS.ErrnoException;
|
||||
err.code = 'ENOENT';
|
||||
throw err;
|
||||
}
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
import { MissingFileScanner } from '../services/missing-file-scanner';
|
||||
|
||||
// ── Test Helpers ──
|
||||
|
||||
let tmpDir: string;
|
||||
let db: Awaited<ReturnType<typeof initDatabaseAsync>>;
|
||||
|
||||
async function setupDb() {
|
||||
tmpDir = mkdtempSync(join(tmpdir(), 'tubearr-missing-scan-'));
|
||||
const dbPath = join(tmpDir, 'test.db');
|
||||
db = await initDatabaseAsync(dbPath);
|
||||
await runMigrations(dbPath);
|
||||
return db;
|
||||
}
|
||||
|
||||
function cleanup() {
|
||||
closeDatabase();
|
||||
existingFiles.clear();
|
||||
try {
|
||||
if (tmpDir && existsSync(tmpDir)) {
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
}
|
||||
|
||||
/** Insert a content item with the given status and filePath. */
|
||||
async function insertContent(
|
||||
overrides: { status?: string; filePath?: string | null; title?: string } = {}
|
||||
) {
|
||||
const result = await db
|
||||
.insert(contentItems)
|
||||
.values({
|
||||
title: overrides.title ?? 'Test Video',
|
||||
platformContentId: `plat-${Date.now()}-${Math.random()}`,
|
||||
url: 'https://example.com/video',
|
||||
contentType: 'video',
|
||||
status: overrides.status ?? 'downloaded',
|
||||
filePath: 'filePath' in overrides ? overrides.filePath : '/media/test-video.mp4',
|
||||
})
|
||||
.returning();
|
||||
return result[0];
|
||||
}
|
||||
|
||||
// ── Tests ──
|
||||
|
||||
describe('MissingFileScanner', () => {
|
||||
beforeEach(async () => {
|
||||
await setupDb();
|
||||
});
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
it('returns zero counts when no downloaded items exist', async () => {
|
||||
const scanner = new MissingFileScanner(db);
|
||||
const result = await scanner.scanAll();
|
||||
|
||||
expect(result.checked).toBe(0);
|
||||
expect(result.missing).toBe(0);
|
||||
expect(result.duration).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('does not flag items whose files exist on disk', async () => {
|
||||
const item = await insertContent({ filePath: '/media/exists.mp4' });
|
||||
existingFiles.add('/media/exists.mp4');
|
||||
|
||||
const scanner = new MissingFileScanner(db);
|
||||
const result = await scanner.scanAll();
|
||||
|
||||
expect(result.checked).toBe(1);
|
||||
expect(result.missing).toBe(0);
|
||||
|
||||
// Status should remain 'downloaded'
|
||||
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
|
||||
expect(rows[0].status).toBe('downloaded');
|
||||
});
|
||||
|
||||
it('marks items as missing when file does not exist', async () => {
|
||||
const item = await insertContent({ filePath: '/media/gone.mp4' });
|
||||
// Don't add to existingFiles — file is "missing"
|
||||
|
||||
const scanner = new MissingFileScanner(db);
|
||||
const result = await scanner.scanAll();
|
||||
|
||||
expect(result.checked).toBe(1);
|
||||
expect(result.missing).toBe(1);
|
||||
|
||||
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
|
||||
expect(rows[0].status).toBe('missing');
|
||||
});
|
||||
|
||||
it('skips items with non-downloaded status', async () => {
|
||||
await insertContent({ status: 'monitored', filePath: '/media/monitored.mp4' });
|
||||
await insertContent({ status: 'queued', filePath: '/media/queued.mp4' });
|
||||
await insertContent({ status: 'failed', filePath: '/media/failed.mp4' });
|
||||
|
||||
const scanner = new MissingFileScanner(db);
|
||||
const result = await scanner.scanAll();
|
||||
|
||||
expect(result.checked).toBe(0);
|
||||
expect(result.missing).toBe(0);
|
||||
});
|
||||
|
||||
it('skips downloaded items with null filePath', async () => {
|
||||
await insertContent({ status: 'downloaded', filePath: null });
|
||||
|
||||
const scanner = new MissingFileScanner(db);
|
||||
const result = await scanner.scanAll();
|
||||
|
||||
expect(result.checked).toBe(0);
|
||||
expect(result.missing).toBe(0);
|
||||
});
|
||||
|
||||
it('handles mixed batch of existing and missing files', async () => {
|
||||
const items = await Promise.all([
|
||||
insertContent({ filePath: '/media/a.mp4', title: 'A' }),
|
||||
insertContent({ filePath: '/media/b.mp4', title: 'B' }),
|
||||
insertContent({ filePath: '/media/c.mp4', title: 'C' }),
|
||||
]);
|
||||
// Only 'a' exists
|
||||
existingFiles.add('/media/a.mp4');
|
||||
|
||||
const scanner = new MissingFileScanner(db);
|
||||
const result = await scanner.scanAll();
|
||||
|
||||
expect(result.checked).toBe(3);
|
||||
expect(result.missing).toBe(2);
|
||||
|
||||
// Verify individual statuses
|
||||
for (const item of items) {
|
||||
const rows = await db.select().from(contentItems).where(eq(contentItems.id, item.id));
|
||||
if (item.filePath === '/media/a.mp4') {
|
||||
expect(rows[0].status).toBe('downloaded');
|
||||
} else {
|
||||
expect(rows[0].status).toBe('missing');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('persists scan results to system_config', async () => {
|
||||
await insertContent({ filePath: '/media/gone.mp4' });
|
||||
|
||||
const scanner = new MissingFileScanner(db);
|
||||
await scanner.scanAll();
|
||||
|
||||
const lastScan = await scanner.getLastScanResult();
|
||||
expect(lastScan).not.toBeNull();
|
||||
expect(lastScan!.lastRun).toBeTruthy();
|
||||
expect(lastScan!.result.checked).toBe(1);
|
||||
expect(lastScan!.result.missing).toBe(1);
|
||||
expect(lastScan!.result.duration).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('returns null for getLastScanResult when no scan has run', async () => {
|
||||
const scanner = new MissingFileScanner(db);
|
||||
const lastScan = await scanner.getLastScanResult();
|
||||
expect(lastScan).toBeNull();
|
||||
});
|
||||
|
||||
it('handles batching correctly with > BATCH_SIZE items', async () => {
|
||||
// Insert 150 downloaded items, all missing from disk
|
||||
const inserts = Array.from({ length: 150 }, (_, i) =>
|
||||
insertContent({ filePath: `/media/file-${i}.mp4`, title: `Video ${i}` })
|
||||
);
|
||||
await Promise.all(inserts);
|
||||
|
||||
const scanner = new MissingFileScanner(db);
|
||||
const result = await scanner.scanAll();
|
||||
|
||||
expect(result.checked).toBe(150);
|
||||
expect(result.missing).toBe(150);
|
||||
|
||||
// All should be marked missing
|
||||
const rows = await db
|
||||
.select({ status: contentItems.status })
|
||||
.from(contentItems)
|
||||
.where(eq(contentItems.status, 'missing'));
|
||||
expect(rows.length).toBe(150);
|
||||
});
|
||||
});
|
||||
|
|
@ -16,7 +16,7 @@ export const contentItems = sqliteTable('content_items', {
|
|||
fileSize: integer('file_size'), // bytes
|
||||
format: text('format'), // container format e.g. 'mp4', 'webm', 'mp3'
|
||||
qualityMetadata: text('quality_metadata', { mode: 'json' }), // actual quality info post-download
|
||||
status: text('status').notNull().default('monitored'), // monitored|queued|downloading|downloaded|failed|ignored
|
||||
status: text('status').notNull().default('monitored'), // monitored|queued|downloading|downloaded|failed|ignored|missing
|
||||
thumbnailUrl: text('thumbnail_url'),
|
||||
publishedAt: text('published_at'), // ISO datetime from platform (nullable)
|
||||
downloadedAt: text('downloaded_at'), // ISO datetime when download completed (nullable)
|
||||
|
|
|
|||
171
src/services/missing-file-scanner.ts
Normal file
171
src/services/missing-file-scanner.ts
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
import { eq, and, isNotNull, sql } from 'drizzle-orm';
|
||||
import type { LibSQLDatabase } from 'drizzle-orm/libsql';
|
||||
import type * as schema from '../db/schema/index';
|
||||
import { contentItems } from '../db/schema/index';
|
||||
import { systemConfig } from '../db/schema/index';
|
||||
import { access } from 'node:fs/promises';
|
||||
|
||||
// ── Types ──
|
||||
|
||||
export interface ScanResult {
|
||||
checked: number;
|
||||
missing: number;
|
||||
duration: number; // milliseconds
|
||||
}
|
||||
|
||||
interface DownloadedRow {
|
||||
id: number;
|
||||
filePath: string;
|
||||
}
|
||||
|
||||
type Db = LibSQLDatabase<typeof schema>;
|
||||
|
||||
// ── Constants ──
|
||||
|
||||
const BATCH_SIZE = 100;
|
||||
const SCAN_LAST_RUN_KEY = 'missing_file_scan_last_run';
|
||||
const SCAN_LAST_RESULT_KEY = 'missing_file_scan_last_result';
|
||||
|
||||
// ── Scanner ──
|
||||
|
||||
export class MissingFileScanner {
|
||||
constructor(private readonly db: Db) {}
|
||||
|
||||
/**
|
||||
* Scan all content items with status='downloaded' and a non-null filePath.
|
||||
* For each, check if the file exists on disk. If not, update status to 'missing'.
|
||||
* Works in batches of BATCH_SIZE to bound memory usage on large libraries.
|
||||
*/
|
||||
async scanAll(): Promise<ScanResult> {
|
||||
const start = Date.now();
|
||||
let checked = 0;
|
||||
let missing = 0;
|
||||
let lastId = 0;
|
||||
|
||||
console.log('[missing-file-scanner] Scan started');
|
||||
|
||||
// Cursor-based pagination: since we mutate status from 'downloaded' to 'missing'
|
||||
// during iteration, offset-based pagination would skip rows. Using id > lastId
|
||||
// ensures we always pick up the next unconsumed batch.
|
||||
while (true) {
|
||||
const batch = await this.db
|
||||
.select({ id: contentItems.id, filePath: contentItems.filePath })
|
||||
.from(contentItems)
|
||||
.where(
|
||||
and(
|
||||
eq(contentItems.status, 'downloaded'),
|
||||
isNotNull(contentItems.filePath),
|
||||
sql`${contentItems.filePath} != ''`,
|
||||
sql`${contentItems.id} > ${lastId}`
|
||||
)
|
||||
)
|
||||
.orderBy(contentItems.id)
|
||||
.limit(BATCH_SIZE);
|
||||
|
||||
if (batch.length === 0) break;
|
||||
|
||||
const missingIds: number[] = [];
|
||||
|
||||
for (const row of batch as DownloadedRow[]) {
|
||||
checked++;
|
||||
const exists = await fileExists(row.filePath);
|
||||
if (!exists) {
|
||||
missingIds.push(row.id);
|
||||
missing++;
|
||||
console.log(`[missing-file-scanner] File missing: id=${row.id} path=${row.filePath}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Batch-update missing items
|
||||
if (missingIds.length > 0) {
|
||||
await this.markMissing(missingIds);
|
||||
}
|
||||
|
||||
// Advance cursor to the last processed id
|
||||
lastId = batch[batch.length - 1].id as number;
|
||||
|
||||
// If batch was smaller than BATCH_SIZE, we've exhausted the result set
|
||||
if (batch.length < BATCH_SIZE) break;
|
||||
}
|
||||
|
||||
const duration = Date.now() - start;
|
||||
const result: ScanResult = { checked, missing, duration };
|
||||
|
||||
console.log(`[missing-file-scanner] Scan completed: checked=${checked} missing=${missing} duration=${duration}ms`);
|
||||
|
||||
// Persist scan metadata
|
||||
await this.persistScanResult(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the last scan result from system_config.
|
||||
* Returns null if no scan has been run yet.
|
||||
*/
|
||||
async getLastScanResult(): Promise<{ lastRun: string; result: ScanResult } | null> {
|
||||
const rows = await this.db
|
||||
.select({ key: systemConfig.key, value: systemConfig.value })
|
||||
.from(systemConfig)
|
||||
.where(eq(systemConfig.key, SCAN_LAST_RUN_KEY));
|
||||
|
||||
if (rows.length === 0) return null;
|
||||
|
||||
const resultRows = await this.db
|
||||
.select({ value: systemConfig.value })
|
||||
.from(systemConfig)
|
||||
.where(eq(systemConfig.key, SCAN_LAST_RESULT_KEY));
|
||||
|
||||
return {
|
||||
lastRun: rows[0].value,
|
||||
result: resultRows.length > 0 ? JSON.parse(resultRows[0].value) : { checked: 0, missing: 0, duration: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
// ── Private ──
|
||||
|
||||
private async markMissing(ids: number[]): Promise<void> {
|
||||
// SQLite has a variable limit; chunk if needed, but BATCH_SIZE=100 is well within limits
|
||||
await this.db
|
||||
.update(contentItems)
|
||||
.set({
|
||||
status: 'missing',
|
||||
updatedAt: sql`(datetime('now'))`,
|
||||
})
|
||||
.where(sql`${contentItems.id} IN (${sql.join(ids.map(id => sql`${id}`), sql`, `)})`);
|
||||
}
|
||||
|
||||
private async persistScanResult(result: ScanResult): Promise<void> {
|
||||
const now = new Date().toISOString();
|
||||
const resultJson = JSON.stringify(result);
|
||||
|
||||
// Upsert last run timestamp
|
||||
await this.db
|
||||
.insert(systemConfig)
|
||||
.values({ key: SCAN_LAST_RUN_KEY, value: now })
|
||||
.onConflictDoUpdate({
|
||||
target: systemConfig.key,
|
||||
set: { value: now, updatedAt: sql`(datetime('now'))` },
|
||||
});
|
||||
|
||||
// Upsert last result
|
||||
await this.db
|
||||
.insert(systemConfig)
|
||||
.values({ key: SCAN_LAST_RESULT_KEY, value: resultJson })
|
||||
.onConflictDoUpdate({
|
||||
target: systemConfig.key,
|
||||
set: { value: resultJson, updatedAt: sql`(datetime('now'))` },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helpers ──
|
||||
|
||||
async function fileExists(filePath: string): Promise<boolean> {
|
||||
try {
|
||||
await access(filePath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -21,6 +21,7 @@ export const ContentStatus = {
|
|||
Downloaded: 'downloaded',
|
||||
Failed: 'failed',
|
||||
Ignored: 'ignored',
|
||||
Missing: 'missing',
|
||||
} as const;
|
||||
export type ContentStatus = (typeof ContentStatus)[keyof typeof ContentStatus];
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue