feat: Wire keyword filter into scheduler scan flow — exclude/include pa…
- "src/services/scheduler.ts" - "src/__tests__/scheduler.test.ts" - "src/db/repositories/channel-repository.ts" GSD-Task: S03/T03
This commit is contained in:
parent
cc031a78a9
commit
05045828d8
3 changed files with 189 additions and 2 deletions
|
|
@ -659,6 +659,180 @@ describe('SchedulerService', () => {
|
||||||
scheduler.stop();
|
scheduler.stop();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ── Keyword filter tests ──
|
||||||
|
|
||||||
|
it('excludes items matching excludeKeywords pattern', async () => {
|
||||||
|
const channel = await insertTestChannel({ excludeKeywords: 'shorts|#shorts' });
|
||||||
|
const scheduler = new SchedulerService(db, registry, rateLimiter);
|
||||||
|
|
||||||
|
const items: PlatformContentMetadata[] = [
|
||||||
|
{
|
||||||
|
platformContentId: `kf_exc_${channel.id}_1`,
|
||||||
|
title: 'Great Video About Coding',
|
||||||
|
url: 'https://www.youtube.com/watch?v=1',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 600,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
platformContentId: `kf_exc_${channel.id}_2`,
|
||||||
|
title: 'Quick shorts compilation',
|
||||||
|
url: 'https://www.youtube.com/watch?v=2',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 30,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
platformContentId: `kf_exc_${channel.id}_3`,
|
||||||
|
title: 'My Day #shorts vlog',
|
||||||
|
url: 'https://www.youtube.com/watch?v=3',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 15,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
mockFetchRecentContent.mockResolvedValueOnce(items);
|
||||||
|
|
||||||
|
const result = await scheduler.checkChannel(channel);
|
||||||
|
|
||||||
|
// Only the first item should pass the filter
|
||||||
|
expect(result.newItems).toBe(1);
|
||||||
|
expect(result.totalFetched).toBe(3);
|
||||||
|
|
||||||
|
const content = await getContentByChannelId(db, channel.id);
|
||||||
|
const inserted = content.filter(c =>
|
||||||
|
c.platformContentId.startsWith(`kf_exc_${channel.id}`)
|
||||||
|
);
|
||||||
|
expect(inserted.length).toBe(1);
|
||||||
|
expect(inserted[0].title).toBe('Great Video About Coding');
|
||||||
|
|
||||||
|
scheduler.stop();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('includes only items matching includeKeywords pattern', async () => {
|
||||||
|
const channel = await insertTestChannel({ includeKeywords: 'tutorial|guide' });
|
||||||
|
const scheduler = new SchedulerService(db, registry, rateLimiter);
|
||||||
|
|
||||||
|
const items: PlatformContentMetadata[] = [
|
||||||
|
{
|
||||||
|
platformContentId: `kf_inc_${channel.id}_1`,
|
||||||
|
title: 'Python Tutorial for Beginners',
|
||||||
|
url: 'https://www.youtube.com/watch?v=1',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 1800,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
platformContentId: `kf_inc_${channel.id}_2`,
|
||||||
|
title: 'Random Vlog Day 5',
|
||||||
|
url: 'https://www.youtube.com/watch?v=2',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 300,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
platformContentId: `kf_inc_${channel.id}_3`,
|
||||||
|
title: 'Ultimate Guide to Docker',
|
||||||
|
url: 'https://www.youtube.com/watch?v=3',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 2400,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
mockFetchRecentContent.mockResolvedValueOnce(items);
|
||||||
|
|
||||||
|
const result = await scheduler.checkChannel(channel);
|
||||||
|
|
||||||
|
expect(result.newItems).toBe(2);
|
||||||
|
const content = await getContentByChannelId(db, channel.id);
|
||||||
|
const inserted = content.filter(c =>
|
||||||
|
c.platformContentId.startsWith(`kf_inc_${channel.id}`)
|
||||||
|
);
|
||||||
|
expect(inserted.length).toBe(2);
|
||||||
|
const titles = inserted.map(c => c.title);
|
||||||
|
expect(titles).toContain('Python Tutorial for Beginners');
|
||||||
|
expect(titles).toContain('Ultimate Guide to Docker');
|
||||||
|
|
||||||
|
scheduler.stop();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies both include and exclude patterns together', async () => {
|
||||||
|
const channel = await insertTestChannel({
|
||||||
|
includeKeywords: 'tutorial',
|
||||||
|
excludeKeywords: 'shorts',
|
||||||
|
});
|
||||||
|
const scheduler = new SchedulerService(db, registry, rateLimiter);
|
||||||
|
|
||||||
|
const items: PlatformContentMetadata[] = [
|
||||||
|
{
|
||||||
|
platformContentId: `kf_both_${channel.id}_1`,
|
||||||
|
title: 'Tutorial: Getting Started',
|
||||||
|
url: 'https://www.youtube.com/watch?v=1',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 1800,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
platformContentId: `kf_both_${channel.id}_2`,
|
||||||
|
title: 'Tutorial shorts recap',
|
||||||
|
url: 'https://www.youtube.com/watch?v=2',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 30,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
platformContentId: `kf_both_${channel.id}_3`,
|
||||||
|
title: 'Random Gaming Stream',
|
||||||
|
url: 'https://www.youtube.com/watch?v=3',
|
||||||
|
contentType: 'video',
|
||||||
|
duration: 7200,
|
||||||
|
thumbnailUrl: null,
|
||||||
|
publishedAt: null,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
mockFetchRecentContent.mockResolvedValueOnce(items);
|
||||||
|
|
||||||
|
const result = await scheduler.checkChannel(channel);
|
||||||
|
|
||||||
|
// Item 1: matches include, no exclude match → pass
|
||||||
|
// Item 2: matches include AND exclude → excluded (exclude wins)
|
||||||
|
// Item 3: doesn't match include → excluded
|
||||||
|
expect(result.newItems).toBe(1);
|
||||||
|
const content = await getContentByChannelId(db, channel.id);
|
||||||
|
const inserted = content.filter(c =>
|
||||||
|
c.platformContentId.startsWith(`kf_both_${channel.id}`)
|
||||||
|
);
|
||||||
|
expect(inserted.length).toBe(1);
|
||||||
|
expect(inserted[0].title).toBe('Tutorial: Getting Started');
|
||||||
|
|
||||||
|
scheduler.stop();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not filter when no keywords are set', async () => {
|
||||||
|
const channel = await insertTestChannel({
|
||||||
|
includeKeywords: null,
|
||||||
|
excludeKeywords: null,
|
||||||
|
});
|
||||||
|
const scheduler = new SchedulerService(db, registry, rateLimiter);
|
||||||
|
|
||||||
|
mockFetchRecentContent.mockResolvedValueOnce(
|
||||||
|
makeCannedContent(4, `kf_none_${channel.id}`)
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = await scheduler.checkChannel(channel);
|
||||||
|
expect(result.newItems).toBe(4);
|
||||||
|
|
||||||
|
scheduler.stop();
|
||||||
|
});
|
||||||
|
|
||||||
// ── monitoringMode-aware item creation tests ──
|
// ── monitoringMode-aware item creation tests ──
|
||||||
|
|
||||||
it("creates items with monitored=false when channel monitoringMode is 'none'", async () => {
|
it("creates items with monitored=false when channel monitoringMode is 'none'", async () => {
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,8 @@ export async function createChannel(
|
||||||
bannerUrl: data.bannerUrl ?? null,
|
bannerUrl: data.bannerUrl ?? null,
|
||||||
description: data.description ?? null,
|
description: data.description ?? null,
|
||||||
subscriberCount: data.subscriberCount ?? null,
|
subscriberCount: data.subscriberCount ?? null,
|
||||||
|
includeKeywords: data.includeKeywords ?? null,
|
||||||
|
excludeKeywords: data.excludeKeywords ?? null,
|
||||||
})
|
})
|
||||||
.returning();
|
.returning();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import type { PlatformRegistry, PlatformSource, FetchRecentContentOptions } from
|
||||||
import type { RateLimiter } from './rate-limiter';
|
import type { RateLimiter } from './rate-limiter';
|
||||||
import { YtDlpError } from '../sources/yt-dlp';
|
import { YtDlpError } from '../sources/yt-dlp';
|
||||||
import type { EventBus } from './event-bus';
|
import type { EventBus } from './event-bus';
|
||||||
|
import { matchesKeywordFilter } from './keyword-filter';
|
||||||
import {
|
import {
|
||||||
getEnabledChannels,
|
getEnabledChannels,
|
||||||
updateChannel,
|
updateChannel,
|
||||||
|
|
@ -236,9 +237,19 @@ export class SchedulerService {
|
||||||
(item) => !existingIds.has(item.platformContentId)
|
(item) => !existingIds.has(item.platformContentId)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// 6b. Apply keyword filter — exclude/include patterns from channel settings
|
||||||
|
const filteredItems = newItems.filter((item) =>
|
||||||
|
matchesKeywordFilter(item.title, channel.includeKeywords, channel.excludeKeywords)
|
||||||
|
);
|
||||||
|
if (filteredItems.length < newItems.length) {
|
||||||
|
console.log(
|
||||||
|
`[scheduler] Keyword filter: ${newItems.length - filteredItems.length} of ${newItems.length} new items filtered out for channel ${channel.id}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// 7. Insert new items (check abort between each)
|
// 7. Insert new items (check abort between each)
|
||||||
let insertedCount = 0;
|
let insertedCount = 0;
|
||||||
for (const item of newItems) {
|
for (const item of filteredItems) {
|
||||||
// Check if scan was cancelled
|
// Check if scan was cancelled
|
||||||
if (effectiveSignal.aborted) {
|
if (effectiveSignal.aborted) {
|
||||||
console.log(
|
console.log(
|
||||||
|
|
@ -310,7 +321,7 @@ export class SchedulerService {
|
||||||
// This runs after the scan result is returned — enrichment updates DB records
|
// This runs after the scan result is returned — enrichment updates DB records
|
||||||
// and triggers a final cache invalidation when done.
|
// and triggers a final cache invalidation when done.
|
||||||
if (insertedCount > 0 && !effectiveSignal.aborted) {
|
if (insertedCount > 0 && !effectiveSignal.aborted) {
|
||||||
this.enrichNewItems(channel, newItems, existingIds, rateLimitDelay, source, effectiveSignal)
|
this.enrichNewItems(channel, filteredItems, existingIds, rateLimitDelay, source, effectiveSignal)
|
||||||
.catch((err) => {
|
.catch((err) => {
|
||||||
console.error(
|
console.error(
|
||||||
`[scheduler] Background enrichment failed for channel ${channel.id}:`,
|
`[scheduler] Background enrichment failed for channel ${channel.id}:`,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue