diff --git a/src/__tests__/scheduler.test.ts b/src/__tests__/scheduler.test.ts index 9a5cd70..30adce0 100644 --- a/src/__tests__/scheduler.test.ts +++ b/src/__tests__/scheduler.test.ts @@ -659,6 +659,180 @@ describe('SchedulerService', () => { scheduler.stop(); }); + // ── Keyword filter tests ── + + it('excludes items matching excludeKeywords pattern', async () => { + const channel = await insertTestChannel({ excludeKeywords: 'shorts|#shorts' }); + const scheduler = new SchedulerService(db, registry, rateLimiter); + + const items: PlatformContentMetadata[] = [ + { + platformContentId: `kf_exc_${channel.id}_1`, + title: 'Great Video About Coding', + url: 'https://www.youtube.com/watch?v=1', + contentType: 'video', + duration: 600, + thumbnailUrl: null, + publishedAt: null, + }, + { + platformContentId: `kf_exc_${channel.id}_2`, + title: 'Quick shorts compilation', + url: 'https://www.youtube.com/watch?v=2', + contentType: 'video', + duration: 30, + thumbnailUrl: null, + publishedAt: null, + }, + { + platformContentId: `kf_exc_${channel.id}_3`, + title: 'My Day #shorts vlog', + url: 'https://www.youtube.com/watch?v=3', + contentType: 'video', + duration: 15, + thumbnailUrl: null, + publishedAt: null, + }, + ]; + mockFetchRecentContent.mockResolvedValueOnce(items); + + const result = await scheduler.checkChannel(channel); + + // Only the first item should pass the filter + expect(result.newItems).toBe(1); + expect(result.totalFetched).toBe(3); + + const content = await getContentByChannelId(db, channel.id); + const inserted = content.filter(c => + c.platformContentId.startsWith(`kf_exc_${channel.id}`) + ); + expect(inserted.length).toBe(1); + expect(inserted[0].title).toBe('Great Video About Coding'); + + scheduler.stop(); + }); + + it('includes only items matching includeKeywords pattern', async () => { + const channel = await insertTestChannel({ includeKeywords: 'tutorial|guide' }); + const scheduler = new SchedulerService(db, registry, rateLimiter); + + const items: PlatformContentMetadata[] = [ + { + platformContentId: `kf_inc_${channel.id}_1`, + title: 'Python Tutorial for Beginners', + url: 'https://www.youtube.com/watch?v=1', + contentType: 'video', + duration: 1800, + thumbnailUrl: null, + publishedAt: null, + }, + { + platformContentId: `kf_inc_${channel.id}_2`, + title: 'Random Vlog Day 5', + url: 'https://www.youtube.com/watch?v=2', + contentType: 'video', + duration: 300, + thumbnailUrl: null, + publishedAt: null, + }, + { + platformContentId: `kf_inc_${channel.id}_3`, + title: 'Ultimate Guide to Docker', + url: 'https://www.youtube.com/watch?v=3', + contentType: 'video', + duration: 2400, + thumbnailUrl: null, + publishedAt: null, + }, + ]; + mockFetchRecentContent.mockResolvedValueOnce(items); + + const result = await scheduler.checkChannel(channel); + + expect(result.newItems).toBe(2); + const content = await getContentByChannelId(db, channel.id); + const inserted = content.filter(c => + c.platformContentId.startsWith(`kf_inc_${channel.id}`) + ); + expect(inserted.length).toBe(2); + const titles = inserted.map(c => c.title); + expect(titles).toContain('Python Tutorial for Beginners'); + expect(titles).toContain('Ultimate Guide to Docker'); + + scheduler.stop(); + }); + + it('applies both include and exclude patterns together', async () => { + const channel = await insertTestChannel({ + includeKeywords: 'tutorial', + excludeKeywords: 'shorts', + }); + const scheduler = new SchedulerService(db, registry, rateLimiter); + + const items: PlatformContentMetadata[] = [ + { + platformContentId: `kf_both_${channel.id}_1`, + title: 'Tutorial: Getting Started', + url: 'https://www.youtube.com/watch?v=1', + contentType: 'video', + duration: 1800, + thumbnailUrl: null, + publishedAt: null, + }, + { + platformContentId: `kf_both_${channel.id}_2`, + title: 'Tutorial shorts recap', + url: 'https://www.youtube.com/watch?v=2', + contentType: 'video', + duration: 30, + thumbnailUrl: null, + publishedAt: null, + }, + { + platformContentId: `kf_both_${channel.id}_3`, + title: 'Random Gaming Stream', + url: 'https://www.youtube.com/watch?v=3', + contentType: 'video', + duration: 7200, + thumbnailUrl: null, + publishedAt: null, + }, + ]; + mockFetchRecentContent.mockResolvedValueOnce(items); + + const result = await scheduler.checkChannel(channel); + + // Item 1: matches include, no exclude match → pass + // Item 2: matches include AND exclude → excluded (exclude wins) + // Item 3: doesn't match include → excluded + expect(result.newItems).toBe(1); + const content = await getContentByChannelId(db, channel.id); + const inserted = content.filter(c => + c.platformContentId.startsWith(`kf_both_${channel.id}`) + ); + expect(inserted.length).toBe(1); + expect(inserted[0].title).toBe('Tutorial: Getting Started'); + + scheduler.stop(); + }); + + it('does not filter when no keywords are set', async () => { + const channel = await insertTestChannel({ + includeKeywords: null, + excludeKeywords: null, + }); + const scheduler = new SchedulerService(db, registry, rateLimiter); + + mockFetchRecentContent.mockResolvedValueOnce( + makeCannedContent(4, `kf_none_${channel.id}`) + ); + + const result = await scheduler.checkChannel(channel); + expect(result.newItems).toBe(4); + + scheduler.stop(); + }); + // ── monitoringMode-aware item creation tests ── it("creates items with monitored=false when channel monitoringMode is 'none'", async () => { diff --git a/src/db/repositories/channel-repository.ts b/src/db/repositories/channel-repository.ts index 252f404..a6ffa4d 100644 --- a/src/db/repositories/channel-repository.ts +++ b/src/db/repositories/channel-repository.ts @@ -49,6 +49,8 @@ export async function createChannel( bannerUrl: data.bannerUrl ?? null, description: data.description ?? null, subscriberCount: data.subscriberCount ?? null, + includeKeywords: data.includeKeywords ?? null, + excludeKeywords: data.excludeKeywords ?? null, }) .returning(); diff --git a/src/services/scheduler.ts b/src/services/scheduler.ts index 65ce8b2..955d028 100644 --- a/src/services/scheduler.ts +++ b/src/services/scheduler.ts @@ -6,6 +6,7 @@ import type { PlatformRegistry, PlatformSource, FetchRecentContentOptions } from import type { RateLimiter } from './rate-limiter'; import { YtDlpError } from '../sources/yt-dlp'; import type { EventBus } from './event-bus'; +import { matchesKeywordFilter } from './keyword-filter'; import { getEnabledChannels, updateChannel, @@ -236,9 +237,19 @@ export class SchedulerService { (item) => !existingIds.has(item.platformContentId) ); + // 6b. Apply keyword filter — exclude/include patterns from channel settings + const filteredItems = newItems.filter((item) => + matchesKeywordFilter(item.title, channel.includeKeywords, channel.excludeKeywords) + ); + if (filteredItems.length < newItems.length) { + console.log( + `[scheduler] Keyword filter: ${newItems.length - filteredItems.length} of ${newItems.length} new items filtered out for channel ${channel.id}` + ); + } + // 7. Insert new items (check abort between each) let insertedCount = 0; - for (const item of newItems) { + for (const item of filteredItems) { // Check if scan was cancelled if (effectiveSignal.aborted) { console.log( @@ -310,7 +321,7 @@ export class SchedulerService { // This runs after the scan result is returned — enrichment updates DB records // and triggers a final cache invalidation when done. if (insertedCount > 0 && !effectiveSignal.aborted) { - this.enrichNewItems(channel, newItems, existingIds, rateLimitDelay, source, effectiveSignal) + this.enrichNewItems(channel, filteredItems, existingIds, rateLimitDelay, source, effectiveSignal) .catch((err) => { console.error( `[scheduler] Background enrichment failed for channel ${channel.id}:`,