From 125983588d95c548f9096af12e9623db85b64168 Mon Sep 17 00:00:00 2001 From: jlightner Date: Sat, 4 Apr 2026 11:12:19 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20Created=20ASS=20subtitle=20generator=20?= =?UTF-8?q?with=20karaoke=20word-by-word=20highligh=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "backend/pipeline/caption_generator.py" - "backend/pipeline/shorts_generator.py" - "backend/pipeline/stages.py" - "backend/models.py" - "alembic/versions/027_add_captions_enabled.py" - "backend/pipeline/test_caption_generator.py" GSD-Task: S04/T01 --- .gsd/milestones/M024/M024-ROADMAP.md | 2 +- .../milestones/M024/slices/S03/S03-SUMMARY.md | 95 +++++++++++ .gsd/milestones/M024/slices/S03/S03-UAT.md | 52 ++++++ .../M024/slices/S03/tasks/T02-VERIFY.json | 16 ++ .gsd/milestones/M024/slices/S04/S04-PLAN.md | 71 +++++++- .../M024/slices/S04/S04-RESEARCH.md | 108 ++++++++++++ .../M024/slices/S04/tasks/T01-PLAN.md | 50 ++++++ .../M024/slices/S04/tasks/T01-SUMMARY.md | 87 ++++++++++ .../M024/slices/S04/tasks/T02-PLAN.md | 48 ++++++ .../M024/slices/S04/tasks/T03-PLAN.md | 47 ++++++ alembic/versions/027_add_captions_enabled.py | 30 ++++ backend/models.py | 4 + backend/pipeline/caption_generator.py | 155 +++++++++++++++++ backend/pipeline/shorts_generator.py | 15 +- backend/pipeline/stages.py | 62 ++++++- backend/pipeline/test_caption_generator.py | 159 ++++++++++++++++++ 16 files changed, 997 insertions(+), 4 deletions(-) create mode 100644 .gsd/milestones/M024/slices/S03/S03-SUMMARY.md create mode 100644 .gsd/milestones/M024/slices/S03/S03-UAT.md create mode 100644 .gsd/milestones/M024/slices/S03/tasks/T02-VERIFY.json create mode 100644 .gsd/milestones/M024/slices/S04/S04-RESEARCH.md create mode 100644 .gsd/milestones/M024/slices/S04/tasks/T01-PLAN.md create mode 100644 .gsd/milestones/M024/slices/S04/tasks/T01-SUMMARY.md create mode 100644 .gsd/milestones/M024/slices/S04/tasks/T02-PLAN.md create mode 100644 .gsd/milestones/M024/slices/S04/tasks/T03-PLAN.md create mode 100644 alembic/versions/027_add_captions_enabled.py create mode 100644 backend/pipeline/caption_generator.py create mode 100644 backend/pipeline/test_caption_generator.py diff --git a/.gsd/milestones/M024/M024-ROADMAP.md b/.gsd/milestones/M024/M024-ROADMAP.md index 49536c7..9a7efba 100644 --- a/.gsd/milestones/M024/M024-ROADMAP.md +++ b/.gsd/milestones/M024/M024-ROADMAP.md @@ -8,7 +8,7 @@ Shorts pipeline goes end-to-end with captioning and templates. Player gets key m |----|-------|------|---------|------|------------| | S01 | [A] Shorts Publishing Flow | medium | — | ✅ | Creator approves a short → it renders → gets a shareable URL and embed code | | S02 | [A] Key Moment Pins on Player Timeline | low | — | ✅ | Key technique moments appear as clickable pins on the player timeline | -| S03 | [A] Embed Support (iframe Snippet) | low | — | ⬜ | Creators can copy an iframe embed snippet to put the player on their own site | +| S03 | [A] Embed Support (iframe Snippet) | low | — | ✅ | Creators can copy an iframe embed snippet to put the player on their own site | | S04 | [B] Auto-Captioning + Template System | medium | — | ⬜ | Shorts have Whisper-generated animated subtitles and creator-configurable intro/outro cards | | S05 | [B] Citation UX Improvements | low | — | ⬜ | Chat citations show timestamp links that seek the player and source cards with video thumbnails | | S06 | Forgejo KB Update — Shorts, Embed, Citations | low | S01, S02, S03, S04, S05 | ⬜ | Forgejo wiki updated with shorts pipeline, embed system, citation architecture | diff --git a/.gsd/milestones/M024/slices/S03/S03-SUMMARY.md b/.gsd/milestones/M024/slices/S03/S03-SUMMARY.md new file mode 100644 index 0000000..12b77a5 --- /dev/null +++ b/.gsd/milestones/M024/slices/S03/S03-SUMMARY.md @@ -0,0 +1,95 @@ +--- +id: S03 +parent: M024 +milestone: M024 +provides: + - EmbedPlayer page at /embed/:videoId + - Shared copyToClipboard utility + - Copy Embed Code button on WatchPage +requires: + [] +affects: + - S06 +key_files: + - frontend/src/utils/clipboard.ts + - frontend/src/pages/EmbedPlayer.tsx + - frontend/src/pages/EmbedPlayer.module.css + - frontend/src/pages/ShortPlayer.tsx + - frontend/src/App.tsx + - frontend/src/pages/WatchPage.tsx + - frontend/src/App.css +key_decisions: + - Embed route rendered at top-level Routes before AppShell fallback for chrome-free iframe rendering + - Audio-only embeds use height 120 vs 405 for video in generated snippet + - Branding link opens origin in new tab with noopener for iframe safety + - copyToClipboard extracted to shared utility for reuse across ShortPlayer and WatchPage +patterns_established: + - Top-level Routes in App.tsx for chrome-free pages that skip AppShell (header/nav/footer) +observability_surfaces: + - none +drill_down_paths: + - .gsd/milestones/M024/slices/S03/tasks/T01-SUMMARY.md + - .gsd/milestones/M024/slices/S03/tasks/T02-SUMMARY.md +duration: "" +verification_result: passed +completed_at: 2026-04-04T11:00:25.948Z +blocker_discovered: false +--- + +# S03: [A] Embed Support (iframe Snippet) + +**Creators can copy an iframe embed snippet from WatchPage, and /embed/:videoId renders a chrome-free player suitable for iframe embedding.** + +## What Happened + +Built the embed support feature in two tasks. T01 extracted copyToClipboard from ShortPlayer into a shared utility at `frontend/src/utils/clipboard.ts`, then created the EmbedPlayer page (`EmbedPlayer.tsx` + `EmbedPlayer.module.css`) — a full-viewport dark-background player that fetches video detail and renders either VideoPlayer or AudioWaveform based on content type, with a small "Powered by Chrysopedia" branding link. + +T02 wired the `/embed/:videoId` route at the top level of App.tsx's Routes (before the AppShell catch-all), so embed pages render without header/nav/footer. Added a "Copy Embed Code" button to WatchPage's header that generates an iframe snippet with audio-aware height (120px for audio-only, 405px for video) and shows 2-second "Copied!" feedback. The EmbedPlayer chunk is code-split via React.lazy. + +Both `tsc --noEmit` and `npm run build` pass cleanly. The embed route is isolated from the app shell, the clipboard utility is shared, and the iframe snippet includes correct dimensions per content type. + +## Verification + +- `cd frontend && npx tsc --noEmit` — exit 0, zero type errors +- `cd frontend && npm run build` — exit 0, EmbedPlayer code-split into own chunk, 190 modules transformed +- Files confirmed: clipboard.ts, EmbedPlayer.tsx, EmbedPlayer.module.css all present +- App.tsx has /embed/:videoId route before AppShell catch-all (line 233) +- WatchPage.tsx generates iframe snippet with audio-aware height (line 35) + +## Requirements Advanced + +None. + +## Requirements Validated + +None. + +## New Requirements Surfaced + +None. + +## Requirements Invalidated or Re-scoped + +None. + +## Deviations + +Added .watch-page__header-top flex container in WatchPage for title/button layout — minor structural addition not in plan. + +## Known Limitations + +None. + +## Follow-ups + +None. + +## Files Created/Modified + +- `frontend/src/utils/clipboard.ts` — New shared copyToClipboard utility extracted from ShortPlayer +- `frontend/src/pages/EmbedPlayer.tsx` — New chrome-free embed player page with video/audio support +- `frontend/src/pages/EmbedPlayer.module.css` — Full-viewport dark layout styles for embed player +- `frontend/src/pages/ShortPlayer.tsx` — Updated to import copyToClipboard from shared utility +- `frontend/src/App.tsx` — Added /embed/:videoId route before AppShell catch-all +- `frontend/src/pages/WatchPage.tsx` — Added Copy Embed Code button with audio-aware iframe snippet +- `frontend/src/App.css` — Added styles for embed copy button diff --git a/.gsd/milestones/M024/slices/S03/S03-UAT.md b/.gsd/milestones/M024/slices/S03/S03-UAT.md new file mode 100644 index 0000000..181731d --- /dev/null +++ b/.gsd/milestones/M024/slices/S03/S03-UAT.md @@ -0,0 +1,52 @@ +# S03: [A] Embed Support (iframe Snippet) — UAT + +**Milestone:** M024 +**Written:** 2026-04-04T11:00:25.948Z + +## UAT: Embed Support (iframe Snippet) + +### Preconditions +- Chrysopedia frontend running (ub01:8096 or local dev server) +- At least one video-type and one audio-only source video in the database + +### Test 1: Copy Embed Code — Video Content +1. Navigate to a WatchPage for a video that has `video_url` (e.g., `/watch/{videoId}`) +2. Locate the "Copy Embed Code" button in the page header +3. Click the button +4. **Expected:** Button text changes to "Copied!" for ~2 seconds, then reverts to "Copy Embed Code" +5. Paste clipboard contents into a text editor +6. **Expected:** Clipboard contains `` + +### Test 2: Copy Embed Code — Audio-Only Content +1. Navigate to a WatchPage for an audio-only source video (no `video_url`) +2. Click "Copy Embed Code" +3. Paste clipboard contents +4. **Expected:** iframe height is `120` (not 405): `` + +### Test 3: Embed Route — Chrome-Free Video +1. Navigate directly to `/embed/{videoId}` for a video source +2. **Expected:** Full-viewport dark background, video player fills the space, no site header/nav/footer +3. **Expected:** Small "Powered by Chrysopedia" link at the bottom +4. **Expected:** Player controls (play/pause, seek, volume) are functional + +### Test 4: Embed Route — Chrome-Free Audio +1. Navigate to `/embed/{videoId}` for an audio-only source +2. **Expected:** Audio waveform or audio player renders instead of video player +3. **Expected:** Same chrome-free layout with branding link + +### Test 5: Embed Route with Start Time +1. Navigate to `/embed/{videoId}?t=30` +2. **Expected:** Player starts at or seeks to the 30-second mark + +### Test 6: Embed Route — Invalid Video ID +1. Navigate to `/embed/nonexistent-id` +2. **Expected:** Error state displayed (not a blank page or crash) + +### Test 7: iframe Integration +1. Create a local HTML file with the copied iframe snippet +2. Open it in a browser +3. **Expected:** Chrysopedia player loads inside the iframe, video plays, no app chrome visible + +### Edge Cases +- **Rapid clicks on Copy Embed Code:** Should not stack timeouts or cause flickering — button stays "Copied!" and timer resets +- **Narrow viewport in iframe:** Embed player should be responsive, scaling to container width diff --git a/.gsd/milestones/M024/slices/S03/tasks/T02-VERIFY.json b/.gsd/milestones/M024/slices/S03/tasks/T02-VERIFY.json new file mode 100644 index 0000000..ca92f5c --- /dev/null +++ b/.gsd/milestones/M024/slices/S03/tasks/T02-VERIFY.json @@ -0,0 +1,16 @@ +{ + "schemaVersion": 1, + "taskId": "T02", + "unitId": "M024/S03/T02", + "timestamp": 1775300354664, + "passed": true, + "discoverySource": "task-plan", + "checks": [ + { + "command": "cd frontend", + "exitCode": 0, + "durationMs": 14, + "verdict": "pass" + } + ] +} diff --git a/.gsd/milestones/M024/slices/S04/S04-PLAN.md b/.gsd/milestones/M024/slices/S04/S04-PLAN.md index 731e170..f2a9b2c 100644 --- a/.gsd/milestones/M024/slices/S04/S04-PLAN.md +++ b/.gsd/milestones/M024/slices/S04/S04-PLAN.md @@ -1,6 +1,75 @@ # S04: [B] Auto-Captioning + Template System -**Goal:** Add auto-captioning and template system to shorts pipeline +**Goal:** Shorts have Whisper-generated animated subtitles and creator-configurable intro/outro cards **Demo:** After this: Shorts have Whisper-generated animated subtitles and creator-configurable intro/outro cards ## Tasks +- [x] **T01: Created ASS subtitle generator with karaoke word-by-word highlighting and wired it into the shorts generation stage with non-blocking caption enrichment** — Create `caption_generator.py` that converts word-level timings into ASS (Advanced SubStation Alpha) subtitle format with word-by-word karaoke highlighting. Modify `shorts_generator.py` to accept an optional ASS file path and chain the `ass=` filter into the ffmpeg `-vf` string. Wire transcript loading and caption generation into `stage_generate_shorts` in `stages.py`. Add `captions_enabled` boolean column to `GeneratedShort` model. Write unit tests for caption generation. + +Steps: +1. Read `backend/pipeline/highlight_scorer.py` for `extract_word_timings` signature and output format. Read `backend/pipeline/shorts_generator.py` for `extract_clip` and `PRESETS`. Read `backend/pipeline/stages.py:2869-2990` for `stage_generate_shorts` flow. +2. Create `backend/pipeline/caption_generator.py`: + - `generate_ass_captions(word_timings: list[dict], clip_start: float, style_config: dict | None = None) -> str` — returns ASS file content as string. Each word gets a `Dialogue` line. Use `{\k}` karaoke tags for word-by-word highlight timing. Style: bold white text, centered bottom 15%, black outline. Offset all word times by `-clip_start` to make them clip-relative. + - `write_ass_file(ass_content: str, output_path: Path) -> Path` — writes to disk, returns path. +3. Modify `extract_clip()` in `shorts_generator.py`: add optional `ass_path: Path | None = None` parameter. When provided, append `,ass={ass_path}` to the `vf_filter` string before passing to ffmpeg. Ensure the ASS filter comes after scale/pad filters. +4. Add `captions_enabled: Mapped[bool] = mapped_column(default=False, server_default='false')` to `GeneratedShort` in `models.py`. +5. Create Alembic migration `027_add_captions_enabled.py` for the new column. +6. Modify `stage_generate_shorts` in `stages.py`: + - After loading the highlight, load `source_video.transcript_path` and parse transcript JSON (reuse the pattern from line ~2465). + - Call `extract_word_timings(transcript_data, clip_start, clip_end)` to get word timings for the clip window. + - If word timings are non-empty, call `generate_ass_captions()` and `write_ass_file()` to a temp path. + - Pass the ASS path to `extract_clip()`. Set `short.captions_enabled = True`. + - If word timings are empty, log a warning and proceed without captions. +7. Create `backend/pipeline/test_caption_generator.py` with tests: + - Valid word timings → correct ASS output with proper timing math + - Empty word timings → empty ASS (or raise, depending on design) + - Clip offset applied correctly (word at t=10.5 with clip_start=10.0 becomes t=0.5) + - ASS format structure (header, style block, dialogue lines) +8. Run tests: `cd backend && python -m pytest pipeline/test_caption_generator.py -v` + - Estimate: 2h + - Files: backend/pipeline/caption_generator.py, backend/pipeline/shorts_generator.py, backend/pipeline/stages.py, backend/models.py, alembic/versions/027_add_captions_enabled.py, backend/pipeline/test_caption_generator.py + - Verify: cd backend && python -m pytest pipeline/test_caption_generator.py -v && python -c "from pipeline.caption_generator import generate_ass_captions; print('import ok')" +- [ ] **T02: Build card renderer and concat pipeline for intro/outro templates** — Create `card_renderer.py` that generates intro/outro card video segments using ffmpeg lavfi (color + drawtext). Add `shorts_template` JSONB column to Creator model. Implement ffmpeg concat demuxer logic to assemble intro + main clip + outro into final short. Wire into `stage_generate_shorts`. Write unit tests for card renderer. + +Steps: +1. Read T01 outputs: `backend/pipeline/caption_generator.py`, modified `shorts_generator.py` and `stages.py`. +2. Add `shorts_template: Mapped[dict | None] = mapped_column(JSONB, nullable=True)` to `Creator` model in `models.py`. Create Alembic migration `028_add_shorts_template.py`. +3. Create `backend/pipeline/card_renderer.py`: + - `render_card(text: str, duration_secs: float, width: int, height: int, accent_color: str = '#22d3ee', font_family: str = 'Inter') -> list[str]` — returns ffmpeg command args that generate a card mp4 from lavfi input (`color=c=black:s={w}x{h}:d={dur}` with `drawtext` for centered text, accent color underline/glow). + - `render_card_to_file(text: str, duration_secs: float, width: int, height: int, output_path: Path, accent_color: str = '#22d3ee', font_family: str = 'Inter') -> Path` — executes the ffmpeg command, returns output path. + - `concat_segments(segments: list[Path], output_path: Path) -> Path` — writes a concat demuxer list file, runs `ffmpeg -f concat -safe 0 -i list.txt -c copy output.mp4`, returns output path. All segments must share codec settings. +4. Modify `shorts_generator.py`: add `extract_clip_with_template(input_path, output_path, start_secs, end_secs, vf_filter, ass_path=None, intro_path=None, outro_path=None) -> None` that extracts the main clip (with optional captions), then if intro/outro paths are provided, concats them via `concat_segments()`. +5. Modify `stage_generate_shorts` in `stages.py`: + - After loading highlight, also load `highlight.source_video.creator` to access `creator.shorts_template`. + - If `shorts_template` exists and `show_intro` is true, call `render_card_to_file()` for intro. Same for outro. + - Pass intro/outro paths to the clip extraction. Use codec-compatible settings (libx264, aac, same resolution from preset spec). + - If no template, proceed without cards (existing behavior preserved). +6. Create `backend/pipeline/test_card_renderer.py` with tests: + - `render_card()` returns valid ffmpeg command with correct dimensions and duration + - `concat_segments()` generates correct concat list file content + - Template config parsing handles missing/partial fields with defaults +7. Run tests: `cd backend && python -m pytest pipeline/test_card_renderer.py -v` + - Estimate: 2h + - Files: backend/pipeline/card_renderer.py, backend/pipeline/shorts_generator.py, backend/pipeline/stages.py, backend/models.py, alembic/versions/028_add_shorts_template.py, backend/pipeline/test_card_renderer.py + - Verify: cd backend && python -m pytest pipeline/test_card_renderer.py -v && python -c "from pipeline.card_renderer import render_card, concat_segments; print('import ok')" +- [ ] **T03: Template API endpoints and frontend template config UI** — Add REST endpoints for reading and updating creator shorts template config. Add template configuration UI to the HighlightQueue page — color picker, text inputs, duration controls, and intro/outro toggles. Add a caption toggle to the short generation flow. + +Steps: +1. Read T02 outputs to understand the shorts_template schema on the Creator model. +2. Create or extend `backend/routers/creators.py` with two endpoints: + - `GET /api/v1/admin/creators/{creator_id}/shorts-template` — returns current `shorts_template` JSONB or default config if null. + - `PUT /api/v1/admin/creators/{creator_id}/shorts-template` — validates and saves template config. Pydantic schema: `ShortsTemplateUpdate` with fields: intro_text (str, max 100), outro_text (str, max 100), accent_color (str, hex pattern), font_family (str), intro_duration_secs (float, 1.0-5.0), outro_duration_secs (float, 1.0-5.0), show_intro (bool), show_outro (bool). + - Both endpoints require admin auth. +3. Add `ShortsTemplateConfig` and `ShortsTemplateUpdate` Pydantic schemas to `backend/schemas.py`. +4. Create `frontend/src/api/templates.ts` — API client functions: `fetchShortsTemplate(creatorId)`, `updateShortsTemplate(creatorId, config)`. +5. Add template config UI to `frontend/src/pages/HighlightQueue.tsx`: + - A collapsible "Shorts Template" section in the sidebar or above the queue. + - Fields: intro text, outro text, accent color (HTML color input), intro/outro duration sliders (1-5s), show intro/outro toggles. + - Save button that calls `updateShortsTemplate()`. + - Load current template on mount when a creator is selected. +6. Add a "Captions" toggle checkbox to the short generation trigger in HighlightQueue — when unchecked, pass `captions=false` query param to the generate endpoint. Update the `POST /api/v1/admin/highlights/{id}/generate-shorts` handler (in `backend/routers/creator_highlights.py` or similar) to accept and forward an optional `captions` param. +7. Verify frontend builds: `cd frontend && npm run build` +8. Verify API imports: `cd backend && python -c "from routers.creators import router; print('ok')"` + - Estimate: 2h + - Files: backend/routers/creators.py, backend/schemas.py, frontend/src/api/templates.ts, frontend/src/pages/HighlightQueue.tsx, frontend/src/pages/HighlightQueue.module.css, backend/routers/creator_highlights.py + - Verify: cd frontend && npm run build 2>&1 | tail -5 && cd ../backend && python -c "from routers.creators import router; print('ok')" diff --git a/.gsd/milestones/M024/slices/S04/S04-RESEARCH.md b/.gsd/milestones/M024/slices/S04/S04-RESEARCH.md new file mode 100644 index 0000000..4f8aeb3 --- /dev/null +++ b/.gsd/milestones/M024/slices/S04/S04-RESEARCH.md @@ -0,0 +1,108 @@ +# S04 Research: Auto-Captioning + Template System + +## Summary + +Add Whisper-derived animated subtitles to generated shorts and a creator-configurable intro/outro card system. Medium complexity — captioning uses established ASS format burned via ffmpeg (well-documented pattern), template system needs schema + UI + ffmpeg concat. + +## Recommendation + +Build captioning first (riskiest, validates the word-timing → ASS → ffmpeg pipeline), then template system (straightforward concat). Keep both features optional per-short — existing shorts should still generate without captions/templates if data is missing. + +## Implementation Landscape + +### Current State + +1. **Shorts generation pipeline** (`backend/pipeline/stages.py:2869–3055`): `stage_generate_shorts` Celery task extracts clips via `extract_clip()` from `shorts_generator.py`, uploads to MinIO, creates `GeneratedShort` DB rows. Currently uses a simple `-vf` filter for scaling/padding only. + +2. **Word-level timing data**: Whisper transcripts stored on disk at `settings.transcript_storage_path` contain per-word `{word, start, end}` dicts inside each segment. The function `extract_word_timings()` in `highlight_scorer.py:186` already extracts words for a `[start_time, end_time]` window — this is the exact input needed for subtitle generation. + +3. **Transcript loading pattern** (`stages.py:2465–2485`): Stage loads transcript JSON from `source_video.transcript_path`, parses segments. Same pattern reusable in `stage_generate_shorts`. + +4. **Creator model** (`models.py:122`): Has `personality_profile` JSONB but no template/branding fields. No intro/outro configuration exists. + +5. **GeneratedShort model** (`models.py:836`): Has format_preset, minio_object_key, dimensions, status. No caption or template metadata columns. + +6. **Frontend**: `ShortPlayer.tsx` is a basic `