feat: Wire keyword filter into scheduler scan flow — exclude/include pa…
- "src/services/scheduler.ts" - "src/__tests__/scheduler.test.ts" - "src/db/repositories/channel-repository.ts" GSD-Task: S03/T03
This commit is contained in:
parent
cc031a78a9
commit
05045828d8
3 changed files with 189 additions and 2 deletions
|
|
@ -659,6 +659,180 @@ describe('SchedulerService', () => {
|
|||
scheduler.stop();
|
||||
});
|
||||
|
||||
// ── Keyword filter tests ──
|
||||
|
||||
it('excludes items matching excludeKeywords pattern', async () => {
|
||||
const channel = await insertTestChannel({ excludeKeywords: 'shorts|#shorts' });
|
||||
const scheduler = new SchedulerService(db, registry, rateLimiter);
|
||||
|
||||
const items: PlatformContentMetadata[] = [
|
||||
{
|
||||
platformContentId: `kf_exc_${channel.id}_1`,
|
||||
title: 'Great Video About Coding',
|
||||
url: 'https://www.youtube.com/watch?v=1',
|
||||
contentType: 'video',
|
||||
duration: 600,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
{
|
||||
platformContentId: `kf_exc_${channel.id}_2`,
|
||||
title: 'Quick shorts compilation',
|
||||
url: 'https://www.youtube.com/watch?v=2',
|
||||
contentType: 'video',
|
||||
duration: 30,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
{
|
||||
platformContentId: `kf_exc_${channel.id}_3`,
|
||||
title: 'My Day #shorts vlog',
|
||||
url: 'https://www.youtube.com/watch?v=3',
|
||||
contentType: 'video',
|
||||
duration: 15,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
];
|
||||
mockFetchRecentContent.mockResolvedValueOnce(items);
|
||||
|
||||
const result = await scheduler.checkChannel(channel);
|
||||
|
||||
// Only the first item should pass the filter
|
||||
expect(result.newItems).toBe(1);
|
||||
expect(result.totalFetched).toBe(3);
|
||||
|
||||
const content = await getContentByChannelId(db, channel.id);
|
||||
const inserted = content.filter(c =>
|
||||
c.platformContentId.startsWith(`kf_exc_${channel.id}`)
|
||||
);
|
||||
expect(inserted.length).toBe(1);
|
||||
expect(inserted[0].title).toBe('Great Video About Coding');
|
||||
|
||||
scheduler.stop();
|
||||
});
|
||||
|
||||
it('includes only items matching includeKeywords pattern', async () => {
|
||||
const channel = await insertTestChannel({ includeKeywords: 'tutorial|guide' });
|
||||
const scheduler = new SchedulerService(db, registry, rateLimiter);
|
||||
|
||||
const items: PlatformContentMetadata[] = [
|
||||
{
|
||||
platformContentId: `kf_inc_${channel.id}_1`,
|
||||
title: 'Python Tutorial for Beginners',
|
||||
url: 'https://www.youtube.com/watch?v=1',
|
||||
contentType: 'video',
|
||||
duration: 1800,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
{
|
||||
platformContentId: `kf_inc_${channel.id}_2`,
|
||||
title: 'Random Vlog Day 5',
|
||||
url: 'https://www.youtube.com/watch?v=2',
|
||||
contentType: 'video',
|
||||
duration: 300,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
{
|
||||
platformContentId: `kf_inc_${channel.id}_3`,
|
||||
title: 'Ultimate Guide to Docker',
|
||||
url: 'https://www.youtube.com/watch?v=3',
|
||||
contentType: 'video',
|
||||
duration: 2400,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
];
|
||||
mockFetchRecentContent.mockResolvedValueOnce(items);
|
||||
|
||||
const result = await scheduler.checkChannel(channel);
|
||||
|
||||
expect(result.newItems).toBe(2);
|
||||
const content = await getContentByChannelId(db, channel.id);
|
||||
const inserted = content.filter(c =>
|
||||
c.platformContentId.startsWith(`kf_inc_${channel.id}`)
|
||||
);
|
||||
expect(inserted.length).toBe(2);
|
||||
const titles = inserted.map(c => c.title);
|
||||
expect(titles).toContain('Python Tutorial for Beginners');
|
||||
expect(titles).toContain('Ultimate Guide to Docker');
|
||||
|
||||
scheduler.stop();
|
||||
});
|
||||
|
||||
it('applies both include and exclude patterns together', async () => {
|
||||
const channel = await insertTestChannel({
|
||||
includeKeywords: 'tutorial',
|
||||
excludeKeywords: 'shorts',
|
||||
});
|
||||
const scheduler = new SchedulerService(db, registry, rateLimiter);
|
||||
|
||||
const items: PlatformContentMetadata[] = [
|
||||
{
|
||||
platformContentId: `kf_both_${channel.id}_1`,
|
||||
title: 'Tutorial: Getting Started',
|
||||
url: 'https://www.youtube.com/watch?v=1',
|
||||
contentType: 'video',
|
||||
duration: 1800,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
{
|
||||
platformContentId: `kf_both_${channel.id}_2`,
|
||||
title: 'Tutorial shorts recap',
|
||||
url: 'https://www.youtube.com/watch?v=2',
|
||||
contentType: 'video',
|
||||
duration: 30,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
{
|
||||
platformContentId: `kf_both_${channel.id}_3`,
|
||||
title: 'Random Gaming Stream',
|
||||
url: 'https://www.youtube.com/watch?v=3',
|
||||
contentType: 'video',
|
||||
duration: 7200,
|
||||
thumbnailUrl: null,
|
||||
publishedAt: null,
|
||||
},
|
||||
];
|
||||
mockFetchRecentContent.mockResolvedValueOnce(items);
|
||||
|
||||
const result = await scheduler.checkChannel(channel);
|
||||
|
||||
// Item 1: matches include, no exclude match → pass
|
||||
// Item 2: matches include AND exclude → excluded (exclude wins)
|
||||
// Item 3: doesn't match include → excluded
|
||||
expect(result.newItems).toBe(1);
|
||||
const content = await getContentByChannelId(db, channel.id);
|
||||
const inserted = content.filter(c =>
|
||||
c.platformContentId.startsWith(`kf_both_${channel.id}`)
|
||||
);
|
||||
expect(inserted.length).toBe(1);
|
||||
expect(inserted[0].title).toBe('Tutorial: Getting Started');
|
||||
|
||||
scheduler.stop();
|
||||
});
|
||||
|
||||
it('does not filter when no keywords are set', async () => {
|
||||
const channel = await insertTestChannel({
|
||||
includeKeywords: null,
|
||||
excludeKeywords: null,
|
||||
});
|
||||
const scheduler = new SchedulerService(db, registry, rateLimiter);
|
||||
|
||||
mockFetchRecentContent.mockResolvedValueOnce(
|
||||
makeCannedContent(4, `kf_none_${channel.id}`)
|
||||
);
|
||||
|
||||
const result = await scheduler.checkChannel(channel);
|
||||
expect(result.newItems).toBe(4);
|
||||
|
||||
scheduler.stop();
|
||||
});
|
||||
|
||||
// ── monitoringMode-aware item creation tests ──
|
||||
|
||||
it("creates items with monitored=false when channel monitoringMode is 'none'", async () => {
|
||||
|
|
|
|||
|
|
@ -49,6 +49,8 @@ export async function createChannel(
|
|||
bannerUrl: data.bannerUrl ?? null,
|
||||
description: data.description ?? null,
|
||||
subscriberCount: data.subscriberCount ?? null,
|
||||
includeKeywords: data.includeKeywords ?? null,
|
||||
excludeKeywords: data.excludeKeywords ?? null,
|
||||
})
|
||||
.returning();
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import type { PlatformRegistry, PlatformSource, FetchRecentContentOptions } from
|
|||
import type { RateLimiter } from './rate-limiter';
|
||||
import { YtDlpError } from '../sources/yt-dlp';
|
||||
import type { EventBus } from './event-bus';
|
||||
import { matchesKeywordFilter } from './keyword-filter';
|
||||
import {
|
||||
getEnabledChannels,
|
||||
updateChannel,
|
||||
|
|
@ -236,9 +237,19 @@ export class SchedulerService {
|
|||
(item) => !existingIds.has(item.platformContentId)
|
||||
);
|
||||
|
||||
// 6b. Apply keyword filter — exclude/include patterns from channel settings
|
||||
const filteredItems = newItems.filter((item) =>
|
||||
matchesKeywordFilter(item.title, channel.includeKeywords, channel.excludeKeywords)
|
||||
);
|
||||
if (filteredItems.length < newItems.length) {
|
||||
console.log(
|
||||
`[scheduler] Keyword filter: ${newItems.length - filteredItems.length} of ${newItems.length} new items filtered out for channel ${channel.id}`
|
||||
);
|
||||
}
|
||||
|
||||
// 7. Insert new items (check abort between each)
|
||||
let insertedCount = 0;
|
||||
for (const item of newItems) {
|
||||
for (const item of filteredItems) {
|
||||
// Check if scan was cancelled
|
||||
if (effectiveSignal.aborted) {
|
||||
console.log(
|
||||
|
|
@ -310,7 +321,7 @@ export class SchedulerService {
|
|||
// This runs after the scan result is returned — enrichment updates DB records
|
||||
// and triggers a final cache invalidation when done.
|
||||
if (insertedCount > 0 && !effectiveSignal.aborted) {
|
||||
this.enrichNewItems(channel, newItems, existingIds, rateLimitDelay, source, effectiveSignal)
|
||||
this.enrichNewItems(channel, filteredItems, existingIds, rateLimitDelay, source, effectiveSignal)
|
||||
.catch((err) => {
|
||||
console.error(
|
||||
`[scheduler] Background enrichment failed for channel ${channel.id}:`,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue