diff --git a/drizzle/0011_add_youtube_enhancements.sql b/drizzle/0011_add_youtube_enhancements.sql new file mode 100644 index 0000000..3e4d45d --- /dev/null +++ b/drizzle/0011_add_youtube_enhancements.sql @@ -0,0 +1,4 @@ +-- Add YouTube enhancement columns to format_profiles +ALTER TABLE format_profiles ADD COLUMN embed_chapters INTEGER NOT NULL DEFAULT 0; +ALTER TABLE format_profiles ADD COLUMN embed_thumbnail INTEGER NOT NULL DEFAULT 0; +ALTER TABLE format_profiles ADD COLUMN sponsor_block_remove TEXT; -- comma-separated: 'sponsor,selfpromo,interaction,intro,outro,preview,music_offtopic,filler' diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 445549e..9320eb7 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -78,6 +78,13 @@ "when": 1775196046744, "tag": "0010_special_ghost_rider", "breakpoints": true + }, + { + "idx": 11, + "version": "7", + "when": 1775253600000, + "tag": "0011_add_youtube_enhancements", + "breakpoints": true } ] -} \ No newline at end of file +} diff --git a/src/__tests__/download.test.ts b/src/__tests__/download.test.ts index 3f83acb..0fe8e8b 100644 --- a/src/__tests__/download.test.ts +++ b/src/__tests__/download.test.ts @@ -334,7 +334,7 @@ describe('DownloadService', () => { containerFormat: 'mkv', isDefault: false, subtitleLanguages: null, - embedSubtitles: false, + embedSubtitles: false, embedChapters: false, embedThumbnail: false, sponsorBlockRemove: null, createdAt: '', updatedAt: '', }; @@ -388,7 +388,7 @@ describe('DownloadService', () => { containerFormat: null, isDefault: false, subtitleLanguages: null, - embedSubtitles: false, + embedSubtitles: false, embedChapters: false, embedThumbnail: false, sponsorBlockRemove: null, createdAt: '', updatedAt: '', }; @@ -642,7 +642,7 @@ describe('DownloadService', () => { containerFormat: null, isDefault: false, subtitleLanguages: null, - embedSubtitles: false, + embedSubtitles: false, embedChapters: false, embedThumbnail: false, sponsorBlockRemove: null, createdAt: '', updatedAt: '', }; @@ -686,7 +686,7 @@ describe('DownloadService', () => { containerFormat: 'mkv', isDefault: false, subtitleLanguages: null, - embedSubtitles: false, + embedSubtitles: false, embedChapters: false, embedThumbnail: false, sponsorBlockRemove: null, createdAt: '', updatedAt: '', }; @@ -738,7 +738,7 @@ describe('DownloadService', () => { containerFormat: null, isDefault: false, subtitleLanguages: null, - embedSubtitles: false, + embedSubtitles: false, embedChapters: false, embedThumbnail: false, sponsorBlockRemove: null, createdAt: '', updatedAt: '', }; diff --git a/src/__tests__/subtitle-download.test.ts b/src/__tests__/subtitle-download.test.ts index b542e62..c3c1d05 100644 --- a/src/__tests__/subtitle-download.test.ts +++ b/src/__tests__/subtitle-download.test.ts @@ -148,7 +148,7 @@ function makeProfile(overrides: Partial = {}): FormatProfile { containerFormat: 'mp4', isDefault: false, subtitleLanguages: null, - embedSubtitles: false, + embedSubtitles: false, embedChapters: false, embedThumbnail: false, sponsorBlockRemove: null, createdAt: '', updatedAt: '', ...overrides, diff --git a/src/db/repositories/format-profile-repository.ts b/src/db/repositories/format-profile-repository.ts index 2900891..95c95ca 100644 --- a/src/db/repositories/format-profile-repository.ts +++ b/src/db/repositories/format-profile-repository.ts @@ -16,6 +16,9 @@ export interface CreateFormatProfileData { isDefault?: boolean; subtitleLanguages?: string | null; embedSubtitles?: boolean; + embedChapters?: boolean; + embedThumbnail?: boolean; + sponsorBlockRemove?: string | null; } /** Fields that can be updated on an existing format profile. */ @@ -28,6 +31,9 @@ export interface UpdateFormatProfileData { isDefault?: boolean; subtitleLanguages?: string | null; embedSubtitles?: boolean; + embedChapters?: boolean; + embedThumbnail?: boolean; + sponsorBlockRemove?: string | null; } type Db = LibSQLDatabase; @@ -60,6 +66,9 @@ export async function createFormatProfile( isDefault: data.isDefault ?? false, subtitleLanguages: data.subtitleLanguages ?? null, embedSubtitles: data.embedSubtitles ?? false, + embedChapters: data.embedChapters ?? false, + embedThumbnail: data.embedThumbnail ?? false, + sponsorBlockRemove: data.sponsorBlockRemove ?? null, }) .returning(); @@ -180,6 +189,9 @@ function mapRow(row: typeof formatProfiles.$inferSelect): FormatProfile { isDefault: row.isDefault, subtitleLanguages: row.subtitleLanguages ?? null, embedSubtitles: row.embedSubtitles, + embedChapters: row.embedChapters, + embedThumbnail: row.embedThumbnail, + sponsorBlockRemove: row.sponsorBlockRemove ?? null, createdAt: row.createdAt, updatedAt: row.updatedAt, }; diff --git a/src/db/schema/content.ts b/src/db/schema/content.ts index d5770fe..8fc22dd 100644 --- a/src/db/schema/content.ts +++ b/src/db/schema/content.ts @@ -41,6 +41,9 @@ export const formatProfiles = sqliteTable('format_profiles', { isDefault: integer('is_default', { mode: 'boolean' }).notNull().default(false), subtitleLanguages: text('subtitle_languages'), embedSubtitles: integer('embed_subtitles', { mode: 'boolean' }).notNull().default(false), + embedChapters: integer('embed_chapters', { mode: 'boolean' }).notNull().default(false), + embedThumbnail: integer('embed_thumbnail', { mode: 'boolean' }).notNull().default(false), + sponsorBlockRemove: text('sponsor_block_remove'), // comma-separated categories: 'sponsor,selfpromo,interaction,intro,outro,preview,music_offtopic,filler' createdAt: text('created_at') .notNull() .default(sql`(datetime('now'))`), diff --git a/src/frontend/src/components/AddChannelModal.tsx b/src/frontend/src/components/AddChannelModal.tsx index c622957..7deb7c7 100644 --- a/src/frontend/src/components/AddChannelModal.tsx +++ b/src/frontend/src/components/AddChannelModal.tsx @@ -31,12 +31,18 @@ function detectPlatform(url: string): Platform | null { return 'soundcloud'; } + // Any valid URL → Generic (yt-dlp supports 1000+ sites) + if (/^https?:\/\/.+/.test(url)) { + return 'generic'; + } + return null; } const PLATFORM_LABELS: Record = { youtube: 'YouTube', soundcloud: 'SoundCloud', + generic: 'Generic', }; // ── Component ── diff --git a/src/frontend/src/components/PlatformBadge.tsx b/src/frontend/src/components/PlatformBadge.tsx index 1a785b3..818bdb9 100644 --- a/src/frontend/src/components/PlatformBadge.tsx +++ b/src/frontend/src/components/PlatformBadge.tsx @@ -5,6 +5,7 @@ import type { Platform } from '@shared/types/index'; const PLATFORM_STYLES: Record = { youtube: { color: '#ff0000', label: 'YouTube' }, soundcloud: { color: '#ff7700', label: 'SoundCloud' }, + generic: { color: '#6366f1', label: 'Generic' }, }; const DEFAULT_STYLE = { color: 'var(--text-secondary)', label: 'Unknown' }; diff --git a/src/index.ts b/src/index.ts index 598ac22..2b520f8 100644 --- a/src/index.ts +++ b/src/index.ts @@ -22,6 +22,7 @@ import { HealthService } from './services/health'; import { PlatformRegistry } from './sources/platform-source'; import { YouTubeSource } from './sources/youtube'; import { SoundCloudSource } from './sources/soundcloud'; +import { GenericSource } from './sources/generic'; import { Platform } from './types/index'; import { getYtDlpVersion, updateYtDlp } from './sources/yt-dlp'; import type { ViteDevServer } from 'vite'; @@ -138,6 +139,7 @@ async function main(): Promise { const platformRegistry = new PlatformRegistry(); platformRegistry.register(Platform.YouTube, new YouTubeSource()); platformRegistry.register(Platform.SoundCloud, new SoundCloudSource()); + platformRegistry.register(Platform.Generic, new GenericSource()); scheduler = new SchedulerService(db, platformRegistry, rateLimiter, { onNewContent: (contentItemId: number) => { diff --git a/src/server/routes/channel.ts b/src/server/routes/channel.ts index e59cfa2..a26538a 100644 --- a/src/server/routes/channel.ts +++ b/src/server/routes/channel.ts @@ -3,6 +3,7 @@ import { parseIdParam } from './helpers'; import { PlatformRegistry } from '../../sources/platform-source'; import { YouTubeSource } from '../../sources/youtube'; import { SoundCloudSource } from '../../sources/soundcloud'; +import { GenericSource } from '../../sources/generic'; import { YtDlpError } from '../../sources/yt-dlp'; import { Platform } from '../../types/index'; import type { MonitoringMode } from '../../types/index'; @@ -25,6 +26,7 @@ function buildDefaultRegistry(): PlatformRegistry { const registry = new PlatformRegistry(); registry.register(Platform.YouTube, new YouTubeSource()); registry.register(Platform.SoundCloud, new SoundCloudSource()); + registry.register(Platform.Generic, new GenericSource()); return registry; } diff --git a/src/server/routes/format-profile.ts b/src/server/routes/format-profile.ts index 79d0652..3eb33fe 100644 --- a/src/server/routes/format-profile.ts +++ b/src/server/routes/format-profile.ts @@ -22,6 +22,9 @@ const createFormatProfileBodySchema = { isDefault: { type: 'boolean' as const }, subtitleLanguages: { type: 'string' as const, nullable: true }, embedSubtitles: { type: 'boolean' as const }, + embedChapters: { type: 'boolean' as const }, + embedThumbnail: { type: 'boolean' as const }, + sponsorBlockRemove: { type: 'string' as const, nullable: true }, }, additionalProperties: false, }; @@ -37,6 +40,9 @@ const updateFormatProfileBodySchema = { isDefault: { type: 'boolean' as const }, subtitleLanguages: { type: 'string' as const, nullable: true }, embedSubtitles: { type: 'boolean' as const }, + embedChapters: { type: 'boolean' as const }, + embedThumbnail: { type: 'boolean' as const }, + sponsorBlockRemove: { type: 'string' as const, nullable: true }, }, additionalProperties: false, }; @@ -65,7 +71,7 @@ export async function formatProfileRoutes(fastify: FastifyInstance): Promise( '/api/v1/format-profile', @@ -117,7 +123,7 @@ export async function formatProfileRoutes(fastify: FastifyInstance): Promise( '/api/v1/format-profile/:id', diff --git a/src/services/download.ts b/src/services/download.ts index 0b7c0f6..5698c1c 100644 --- a/src/services/download.ts +++ b/src/services/download.ts @@ -271,6 +271,24 @@ export class DownloadService { // Subtitle support args.push(...this.buildSubtitleArgs(formatProfile)); + // Chapter embedding + if (formatProfile?.embedChapters) { + args.push('--embed-chapters'); + } + + // Thumbnail embedding + if (formatProfile?.embedThumbnail) { + args.push('--embed-thumbnail'); + } + + // SponsorBlock segment removal + if (formatProfile?.sponsorBlockRemove) { + const categories = formatProfile.sponsorBlockRemove.trim(); + if (categories) { + args.push('--sponsorblock-remove', categories); + } + } + // Always include these flags args.push('--no-playlist'); args.push('--print', 'after_move:filepath'); diff --git a/src/sources/generic.ts b/src/sources/generic.ts new file mode 100644 index 0000000..64ac88a --- /dev/null +++ b/src/sources/generic.ts @@ -0,0 +1,205 @@ +import type { Channel, PlatformSourceMetadata, PlatformContentMetadata, ContentType } from '../types/index'; +import type { PlatformSource, FetchRecentContentOptions } from './platform-source'; +import { execYtDlp, parseJsonLines, parseSingleJson } from './yt-dlp'; + +/** + * Generic platform source — catch-all for any URL yt-dlp supports. + * + * Works with Vimeo, Twitch VODs, Bandcamp, Dailymotion, Twitter/X, + * Instagram, TikTok, Reddit, news sites with embedded video, blogs, + * and hundreds of other sites yt-dlp can extract from. + * + * Unlike YouTube/SoundCloud sources which use channel-level enumeration, + * the Generic source treats the channel URL as a playlist/page to scrape. + * Content discovery uses yt-dlp's built-in extractors with no platform-specific logic. + */ +export class GenericSource implements PlatformSource { + + /** + * Resolve a URL to channel-like metadata. + * + * For generic URLs, the "channel" is whatever yt-dlp identifies as the + * playlist/page/uploader. Falls back to the URL domain as the name + * if yt-dlp can't extract structured metadata. + */ + async resolveChannel(url: string): Promise { + try { + const result = await execYtDlp( + [ + '--dump-single-json', + '--playlist-items', '0', + '--flat-playlist', + url, + ], + { timeout: 30_000 } + ); + + const data = parseSingleJson(result.stdout) as Record; + + // yt-dlp returns various shapes depending on the site + const name = data.channel + ?? data.uploader + ?? data.playlist_title + ?? data.title + ?? new URL(url).hostname; + + const platformId = data.channel_id + ?? data.uploader_id + ?? data.playlist_id + ?? data.id + ?? url; + + const channelUrl = data.channel_url + ?? data.uploader_url + ?? data.webpage_url + ?? url; + + // Best thumbnail + const thumbnails = data.thumbnails as Array<{ url: string; width?: number }> | undefined; + const imageUrl = thumbnails?.length + ? thumbnails[thumbnails.length - 1].url + : null; + + return { + name: String(name), + platformId: String(platformId), + imageUrl, + url: String(channelUrl), + platform: 'generic' as const, + description: data.description ? String(data.description) : null, + }; + } catch { + // Fallback: use URL domain as name, URL as identifier + const hostname = (() => { + try { return new URL(url).hostname; } catch { return 'Unknown'; } + })(); + return { + name: hostname, + platformId: url, + imageUrl: null, + url, + platform: 'generic' as const, + }; + } + } + + /** + * Fetch content from a generic URL. + * + * Treats the channel URL as a page/playlist and enumerates items via + * --flat-playlist. Each item is a potential downloadable media file. + */ + async fetchRecentContent( + channel: Channel, + options?: FetchRecentContentOptions + ): Promise { + const limit = options?.limit ?? 50; + const discoveryOnly = options?.discoveryOnly ?? false; + const existingIds = options?.existingIds ?? new Set(); + const rateLimitDelay = options?.rateLimitDelay ?? 2000; + const signal = options?.signal; + + // Discovery: enumerate items from the URL + const discoveryTimeout = 60_000 + Math.ceil(limit / 500) * 30_000; + const flatResult = await execYtDlp( + [ + '--flat-playlist', + '--dump-json', + '--playlist-items', `1:${limit}`, + channel.url, + ], + { timeout: discoveryTimeout } + ); + + const flatEntries = parseJsonLines(flatResult.stdout) as Record[]; + const discoveredItems = flatEntries.map((entry) => mapEntry(entry)); + + if (discoveryOnly) { + return discoveredItems; + } + + // Enrichment: fetch full metadata for new items only + const newItems = discoveredItems.filter( + (item) => !existingIds.has(item.platformContentId) + ); + + if (newItems.length === 0) return discoveredItems; + + console.log( + `[generic] Enriching ${newItems.length} new items (${discoveredItems.length - newItems.length} already known)` + ); + + const enrichedMap = new Map(); + + for (let i = 0; i < newItems.length; i++) { + if (signal?.aborted) { + console.log(`[generic] Enrichment aborted after ${i} items`); + break; + } + + const item = newItems[i]; + if (i > 0 && rateLimitDelay > 0) { + await sleep(rateLimitDelay); + } + + try { + const enrichResult = await execYtDlp( + ['--dump-json', '--no-playlist', item.url], + { timeout: 30_000 } + ); + const enrichedEntry = parseSingleJson(enrichResult.stdout) as Record; + enrichedMap.set(item.platformContentId, mapEntry(enrichedEntry)); + } catch (err) { + console.warn( + `[generic] Enrichment failed for ${item.platformContentId}: ${err instanceof Error ? err.message : err}` + ); + } + } + + return discoveredItems.map((item) => { + const enriched = enrichedMap.get(item.platformContentId); + return enriched ?? item; + }); + } +} + +// ── Helpers ── + +function mapEntry(entry: Record): PlatformContentMetadata { + const id = String(entry.id ?? entry.url ?? ''); + const title = String(entry.title ?? entry.fulltitle ?? 'Untitled'); + const url = String(entry.webpage_url ?? entry.url ?? entry.original_url ?? ''); + + // Content type detection + const liveStatus = entry.live_status as string | undefined; + const isLive = liveStatus === 'is_live' || liveStatus === 'is_upcoming'; + const isAudio = entry._type === 'audio' + || (entry.vcodec === 'none' && entry.acodec !== 'none') + || /\.(mp3|flac|wav|ogg|opus|m4a|aac)$/i.test(url); + + let contentType: ContentType = 'video'; + if (isLive) contentType = 'livestream'; + else if (isAudio) contentType = 'audio'; + + // Duration + const duration = typeof entry.duration === 'number' ? Math.round(entry.duration) : null; + + // Thumbnail — best quality + const thumbnails = entry.thumbnails as Array<{ url: string }> | undefined; + const thumbnailUrl = thumbnails?.length + ? thumbnails[thumbnails.length - 1].url + : (entry.thumbnail as string | undefined) ?? null; + + // Published date + let publishedAt: string | null = null; + const uploadDate = entry.upload_date as string | undefined; + if (uploadDate && /^\d{8}$/.test(uploadDate)) { + publishedAt = `${uploadDate.slice(0, 4)}-${uploadDate.slice(4, 6)}-${uploadDate.slice(6, 8)}T00:00:00Z`; + } + + return { platformContentId: id, title, url, contentType, duration, thumbnailUrl, publishedAt }; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/src/sources/platform-source.ts b/src/sources/platform-source.ts index 96d56f8..4fd0dab 100644 --- a/src/sources/platform-source.ts +++ b/src/sources/platform-source.ts @@ -115,5 +115,10 @@ function detectPlatformFromUrl(url: string): Platform | null { return 'soundcloud' as Platform; } + // Any URL with a valid scheme → Generic (yt-dlp supports 1000+ sites) + if (/^https?:\/\/.+/.test(url)) { + return 'generic' as Platform; + } + return null; } diff --git a/src/types/index.ts b/src/types/index.ts index 0faa198..e6150ba 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -3,6 +3,7 @@ export const Platform = { YouTube: 'youtube', SoundCloud: 'soundcloud', + Generic: 'generic', } as const; export type Platform = (typeof Platform)[keyof typeof Platform]; @@ -140,6 +141,9 @@ export interface FormatProfile { isDefault: boolean; subtitleLanguages: string | null; // comma-separated lang codes e.g. "en,es,fr" embedSubtitles: boolean; + embedChapters: boolean; + embedThumbnail: boolean; + sponsorBlockRemove: string | null; // comma-separated: 'sponsor,selfpromo,interaction,intro,outro,preview,music_offtopic,filler' createdAt: string; updatedAt: string; }