tubearr/src/sources/generic.ts

import type { Channel, PlatformSourceMetadata, PlatformContentMetadata, ContentType } from '../types/index';
import type { PlatformSource, FetchRecentContentOptions } from './platform-source';
import { execYtDlp, parseJsonLines, parseSingleJson } from './yt-dlp';

/**
 * Generic platform source — catch-all for any URL yt-dlp supports.
 *
 * Works with Vimeo, Twitch VODs, Bandcamp, Dailymotion, Twitter/X,
 * Instagram, TikTok, Reddit, news sites with embedded video, blogs,
 * and hundreds of other sites yt-dlp can extract from.
 *
 * Unlike YouTube/SoundCloud sources which use channel-level enumeration,
 * the Generic source treats the channel URL as a playlist/page to scrape.
 * Content discovery uses yt-dlp's built-in extractors with no platform-specific logic.
 */
export class GenericSource implements PlatformSource {

  /**
   * Resolve a URL to channel-like metadata.
   *
   * For generic URLs, the "channel" is whatever yt-dlp identifies as the
   * playlist/page/uploader. Falls back to the URL domain as the name
   * if yt-dlp can't extract structured metadata.
   */
  async resolveChannel(url: string): Promise<PlatformSourceMetadata> {
    try {
      const result = await execYtDlp(
        [
          '--dump-single-json',
          '--playlist-items', '0',
          '--flat-playlist',
          url,
        ],
        { timeout: 30_000 }
      );

      const data = parseSingleJson(result.stdout) as Record<string, unknown>;

      // yt-dlp returns various shapes depending on the site
      const name = data.channel
        ?? data.uploader
        ?? data.playlist_title
        ?? data.title
        ?? new URL(url).hostname;

      const platformId = data.channel_id
        ?? data.uploader_id
        ?? data.playlist_id
        ?? data.id
        ?? url;

      const channelUrl = data.channel_url
        ?? data.uploader_url
        ?? data.webpage_url
        ?? url;

      // Best thumbnail
      const thumbnails = data.thumbnails as Array<{ url: string; width?: number }> | undefined;
      const imageUrl = thumbnails?.length
        ? thumbnails[thumbnails.length - 1].url
        : null;

      return {
        name: String(name),
        platformId: String(platformId),
        imageUrl,
        url: String(channelUrl),
        platform: 'generic' as const,
        description: data.description ? String(data.description) : null,
      };
    } catch {
      // Fallback: use URL domain as name, URL as identifier
      const hostname = (() => {
        try { return new URL(url).hostname; } catch { return 'Unknown'; }
      })();
      return {
        name: hostname,
        platformId: url,
        imageUrl: null,
        url,
        platform: 'generic' as const,
      };
    }
  }

  /**
   * Fetch content from a generic URL.
   *
   * Treats the channel URL as a page/playlist and enumerates items via
   * --flat-playlist. Each item is a potential downloadable media file.
   */
  async fetchRecentContent(
    channel: Channel,
    options?: FetchRecentContentOptions
  ): Promise<PlatformContentMetadata[]> {
    const limit = options?.limit ?? 50;
    const discoveryOnly = options?.discoveryOnly ?? false;
    const existingIds = options?.existingIds ?? new Set<string>();
    const rateLimitDelay = options?.rateLimitDelay ?? 2000;
    const signal = options?.signal;

    // Discovery: enumerate items from the URL
    const discoveryTimeout = 60_000 + Math.ceil(limit / 500) * 30_000;
    const flatResult = await execYtDlp(
      [
        '--flat-playlist',
        '--dump-json',
        '--playlist-items', `1:${limit}`,
        channel.url,
      ],
      { timeout: discoveryTimeout }
    );

    const flatEntries = parseJsonLines(flatResult.stdout) as Record<string, unknown>[];
    const discoveredItems = flatEntries.map((entry) => mapEntry(entry));

    if (discoveryOnly) {
      return discoveredItems;
    }

    // Enrichment: fetch full metadata for new items only
    const newItems = discoveredItems.filter(
      (item) => !existingIds.has(item.platformContentId)
    );

    if (newItems.length === 0) return discoveredItems;

    console.log(
      `[generic] Enriching ${newItems.length} new items (${discoveredItems.length - newItems.length} already known)`
    );

    const enrichedMap = new Map<string, PlatformContentMetadata>();

    for (let i = 0; i < newItems.length; i++) {
      if (signal?.aborted) {
        console.log(`[generic] Enrichment aborted after ${i} items`);
        break;
      }

      const item = newItems[i];
      if (i > 0 && rateLimitDelay > 0) {
        await sleep(rateLimitDelay);
      }

      try {
        const enrichResult = await execYtDlp(
          ['--dump-json', '--no-playlist', item.url],
          { timeout: 30_000 }
        );
        const enrichedEntry = parseSingleJson(enrichResult.stdout) as Record<string, unknown>;
        enrichedMap.set(item.platformContentId, mapEntry(enrichedEntry));
      } catch (err) {
        console.warn(
          `[generic] Enrichment failed for ${item.platformContentId}: ${err instanceof Error ? err.message : err}`
        );
      }
    }

    return discoveredItems.map((item) => {
      const enriched = enrichedMap.get(item.platformContentId);
      return enriched ?? item;
    });
  }
}

// ── Helpers ──

function mapEntry(entry: Record<string, unknown>): PlatformContentMetadata {
  const id = String(entry.id ?? entry.url ?? '');
  const title = String(entry.title ?? entry.fulltitle ?? 'Untitled');
  const url = String(entry.webpage_url ?? entry.url ?? entry.original_url ?? '');

  // Content type detection
  const liveStatus = entry.live_status as string | undefined;
  const isLive = liveStatus === 'is_live' || liveStatus === 'is_upcoming';
  const isAudio = entry._type === 'audio'
    || (entry.vcodec === 'none' && entry.acodec !== 'none')
    || /\.(mp3|flac|wav|ogg|opus|m4a|aac)$/i.test(url);

  let contentType: ContentType = 'video';
  if (isLive) contentType = 'livestream';
  else if (isAudio) contentType = 'audio';

  // Duration
  const duration = typeof entry.duration === 'number' ? Math.round(entry.duration) : null;

  // Thumbnail — best quality
  const thumbnails = entry.thumbnails as Array<{ url: string }> | undefined;
  const thumbnailUrl = thumbnails?.length
    ? thumbnails[thumbnails.length - 1].url
    : (entry.thumbnail as string | undefined) ?? null;

  // Published date
  let publishedAt: string | null = null;
  const uploadDate = entry.upload_date as string | undefined;
  if (uploadDate && /^\d{8}$/.test(uploadDate)) {
    publishedAt = `${uploadDate.slice(0, 4)}-${uploadDate.slice(4, 6)}-${uploadDate.slice(6, 8)}T00:00:00Z`;
  }

  return { platformContentId: id, title, url, contentType, duration, thumbnailUrl, publishedAt };
}

function sleep(ms: number): Promise<void> {
  return new Promise((resolve) => setTimeout(resolve, ms));
}