Generic Platform: - New 'generic' platform type — catch-all for any URL yt-dlp supports - GenericSource resolves channel metadata from any URL via yt-dlp extractors - Content type auto-detection (video/audio/livestream) from yt-dlp metadata - Works with Vimeo, Twitch, Bandcamp, Dailymotion, and 1000+ other sites - Registered in both scheduler registry and channel route registry - Frontend: indigo badge, URL detection fallback, AddChannelModal support YouTube Enhancements: - embedChapters: --embed-chapters flag on FormatProfile - embedThumbnail: --embed-thumbnail flag on FormatProfile - sponsorBlockRemove: --sponsorblock-remove with configurable categories (sponsor, selfpromo, interaction, intro, outro, preview, music_offtopic, filler) - Migration 0011: adds columns to format_profiles table - All three configurable per format profile via API and (future) Settings UI
205 lines
6.7 KiB
TypeScript
205 lines
6.7 KiB
TypeScript
import type { Channel, PlatformSourceMetadata, PlatformContentMetadata, ContentType } from '../types/index';
|
|
import type { PlatformSource, FetchRecentContentOptions } from './platform-source';
|
|
import { execYtDlp, parseJsonLines, parseSingleJson } from './yt-dlp';
|
|
|
|
/**
|
|
* Generic platform source — catch-all for any URL yt-dlp supports.
|
|
*
|
|
* Works with Vimeo, Twitch VODs, Bandcamp, Dailymotion, Twitter/X,
|
|
* Instagram, TikTok, Reddit, news sites with embedded video, blogs,
|
|
* and hundreds of other sites yt-dlp can extract from.
|
|
*
|
|
* Unlike YouTube/SoundCloud sources which use channel-level enumeration,
|
|
* the Generic source treats the channel URL as a playlist/page to scrape.
|
|
* Content discovery uses yt-dlp's built-in extractors with no platform-specific logic.
|
|
*/
|
|
export class GenericSource implements PlatformSource {
|
|
|
|
/**
|
|
* Resolve a URL to channel-like metadata.
|
|
*
|
|
* For generic URLs, the "channel" is whatever yt-dlp identifies as the
|
|
* playlist/page/uploader. Falls back to the URL domain as the name
|
|
* if yt-dlp can't extract structured metadata.
|
|
*/
|
|
async resolveChannel(url: string): Promise<PlatformSourceMetadata> {
|
|
try {
|
|
const result = await execYtDlp(
|
|
[
|
|
'--dump-single-json',
|
|
'--playlist-items', '0',
|
|
'--flat-playlist',
|
|
url,
|
|
],
|
|
{ timeout: 30_000 }
|
|
);
|
|
|
|
const data = parseSingleJson(result.stdout) as Record<string, unknown>;
|
|
|
|
// yt-dlp returns various shapes depending on the site
|
|
const name = data.channel
|
|
?? data.uploader
|
|
?? data.playlist_title
|
|
?? data.title
|
|
?? new URL(url).hostname;
|
|
|
|
const platformId = data.channel_id
|
|
?? data.uploader_id
|
|
?? data.playlist_id
|
|
?? data.id
|
|
?? url;
|
|
|
|
const channelUrl = data.channel_url
|
|
?? data.uploader_url
|
|
?? data.webpage_url
|
|
?? url;
|
|
|
|
// Best thumbnail
|
|
const thumbnails = data.thumbnails as Array<{ url: string; width?: number }> | undefined;
|
|
const imageUrl = thumbnails?.length
|
|
? thumbnails[thumbnails.length - 1].url
|
|
: null;
|
|
|
|
return {
|
|
name: String(name),
|
|
platformId: String(platformId),
|
|
imageUrl,
|
|
url: String(channelUrl),
|
|
platform: 'generic' as const,
|
|
description: data.description ? String(data.description) : null,
|
|
};
|
|
} catch {
|
|
// Fallback: use URL domain as name, URL as identifier
|
|
const hostname = (() => {
|
|
try { return new URL(url).hostname; } catch { return 'Unknown'; }
|
|
})();
|
|
return {
|
|
name: hostname,
|
|
platformId: url,
|
|
imageUrl: null,
|
|
url,
|
|
platform: 'generic' as const,
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch content from a generic URL.
|
|
*
|
|
* Treats the channel URL as a page/playlist and enumerates items via
|
|
* --flat-playlist. Each item is a potential downloadable media file.
|
|
*/
|
|
async fetchRecentContent(
|
|
channel: Channel,
|
|
options?: FetchRecentContentOptions
|
|
): Promise<PlatformContentMetadata[]> {
|
|
const limit = options?.limit ?? 50;
|
|
const discoveryOnly = options?.discoveryOnly ?? false;
|
|
const existingIds = options?.existingIds ?? new Set<string>();
|
|
const rateLimitDelay = options?.rateLimitDelay ?? 2000;
|
|
const signal = options?.signal;
|
|
|
|
// Discovery: enumerate items from the URL
|
|
const discoveryTimeout = 60_000 + Math.ceil(limit / 500) * 30_000;
|
|
const flatResult = await execYtDlp(
|
|
[
|
|
'--flat-playlist',
|
|
'--dump-json',
|
|
'--playlist-items', `1:${limit}`,
|
|
channel.url,
|
|
],
|
|
{ timeout: discoveryTimeout }
|
|
);
|
|
|
|
const flatEntries = parseJsonLines(flatResult.stdout) as Record<string, unknown>[];
|
|
const discoveredItems = flatEntries.map((entry) => mapEntry(entry));
|
|
|
|
if (discoveryOnly) {
|
|
return discoveredItems;
|
|
}
|
|
|
|
// Enrichment: fetch full metadata for new items only
|
|
const newItems = discoveredItems.filter(
|
|
(item) => !existingIds.has(item.platformContentId)
|
|
);
|
|
|
|
if (newItems.length === 0) return discoveredItems;
|
|
|
|
console.log(
|
|
`[generic] Enriching ${newItems.length} new items (${discoveredItems.length - newItems.length} already known)`
|
|
);
|
|
|
|
const enrichedMap = new Map<string, PlatformContentMetadata>();
|
|
|
|
for (let i = 0; i < newItems.length; i++) {
|
|
if (signal?.aborted) {
|
|
console.log(`[generic] Enrichment aborted after ${i} items`);
|
|
break;
|
|
}
|
|
|
|
const item = newItems[i];
|
|
if (i > 0 && rateLimitDelay > 0) {
|
|
await sleep(rateLimitDelay);
|
|
}
|
|
|
|
try {
|
|
const enrichResult = await execYtDlp(
|
|
['--dump-json', '--no-playlist', item.url],
|
|
{ timeout: 30_000 }
|
|
);
|
|
const enrichedEntry = parseSingleJson(enrichResult.stdout) as Record<string, unknown>;
|
|
enrichedMap.set(item.platformContentId, mapEntry(enrichedEntry));
|
|
} catch (err) {
|
|
console.warn(
|
|
`[generic] Enrichment failed for ${item.platformContentId}: ${err instanceof Error ? err.message : err}`
|
|
);
|
|
}
|
|
}
|
|
|
|
return discoveredItems.map((item) => {
|
|
const enriched = enrichedMap.get(item.platformContentId);
|
|
return enriched ?? item;
|
|
});
|
|
}
|
|
}
|
|
|
|
// ── Helpers ──
|
|
|
|
function mapEntry(entry: Record<string, unknown>): PlatformContentMetadata {
|
|
const id = String(entry.id ?? entry.url ?? '');
|
|
const title = String(entry.title ?? entry.fulltitle ?? 'Untitled');
|
|
const url = String(entry.webpage_url ?? entry.url ?? entry.original_url ?? '');
|
|
|
|
// Content type detection
|
|
const liveStatus = entry.live_status as string | undefined;
|
|
const isLive = liveStatus === 'is_live' || liveStatus === 'is_upcoming';
|
|
const isAudio = entry._type === 'audio'
|
|
|| (entry.vcodec === 'none' && entry.acodec !== 'none')
|
|
|| /\.(mp3|flac|wav|ogg|opus|m4a|aac)$/i.test(url);
|
|
|
|
let contentType: ContentType = 'video';
|
|
if (isLive) contentType = 'livestream';
|
|
else if (isAudio) contentType = 'audio';
|
|
|
|
// Duration
|
|
const duration = typeof entry.duration === 'number' ? Math.round(entry.duration) : null;
|
|
|
|
// Thumbnail — best quality
|
|
const thumbnails = entry.thumbnails as Array<{ url: string }> | undefined;
|
|
const thumbnailUrl = thumbnails?.length
|
|
? thumbnails[thumbnails.length - 1].url
|
|
: (entry.thumbnail as string | undefined) ?? null;
|
|
|
|
// Published date
|
|
let publishedAt: string | null = null;
|
|
const uploadDate = entry.upload_date as string | undefined;
|
|
if (uploadDate && /^\d{8}$/.test(uploadDate)) {
|
|
publishedAt = `${uploadDate.slice(0, 4)}-${uploadDate.slice(4, 6)}-${uploadDate.slice(6, 8)}T00:00:00Z`;
|
|
}
|
|
|
|
return { platformContentId: id, title, url, contentType, duration, thumbnailUrl, publishedAt };
|
|
}
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|