From dfc5aa2ae71b6737f7583e356041ad0ea6638384 Mon Sep 17 00:00:00 2001 From: jlightner Date: Sat, 4 Apr 2026 09:43:36 +0000 Subject: [PATCH] =?UTF-8?q?chore:=20Added=20GeneratedShort=20model=20with?= =?UTF-8?q?=20FormatPreset/ShortStatus=20enums,=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "backend/models.py" - "backend/config.py" - "docker/Dockerfile.api" - "docker-compose.yml" - "alembic/versions/025_add_generated_shorts.py" GSD-Task: S03/T01 --- .gsd/DECISIONS.md | 1 + .gsd/milestones/M023/M023-ROADMAP.md | 2 +- .../milestones/M023/slices/S02/S02-SUMMARY.md | 88 ++++++++ .gsd/milestones/M023/slices/S02/S02-UAT.md | 64 ++++++ .../M023/slices/S02/tasks/T02-VERIFY.json | 24 +++ .gsd/milestones/M023/slices/S03/S03-PLAN.md | 192 +++++++++++++++++- .../M023/slices/S03/S03-RESEARCH.md | 118 +++++++++++ .../M023/slices/S03/tasks/T01-PLAN.md | 73 +++++++ .../M023/slices/S03/tasks/T01-SUMMARY.md | 85 ++++++++ .../M023/slices/S03/tasks/T02-PLAN.md | 102 ++++++++++ .../M023/slices/S03/tasks/T03-PLAN.md | 89 ++++++++ alembic/versions/025_add_generated_shorts.py | 45 ++++ backend/config.py | 1 + backend/models.py | 52 +++++ docker-compose.yml | 2 + docker/Dockerfile.api | 2 +- 16 files changed, 937 insertions(+), 3 deletions(-) create mode 100644 .gsd/milestones/M023/slices/S02/S02-SUMMARY.md create mode 100644 .gsd/milestones/M023/slices/S02/S02-UAT.md create mode 100644 .gsd/milestones/M023/slices/S02/tasks/T02-VERIFY.json create mode 100644 .gsd/milestones/M023/slices/S03/S03-RESEARCH.md create mode 100644 .gsd/milestones/M023/slices/S03/tasks/T01-PLAN.md create mode 100644 .gsd/milestones/M023/slices/S03/tasks/T01-SUMMARY.md create mode 100644 .gsd/milestones/M023/slices/S03/tasks/T02-PLAN.md create mode 100644 .gsd/milestones/M023/slices/S03/tasks/T03-PLAN.md create mode 100644 alembic/versions/025_add_generated_shorts.py diff --git a/.gsd/DECISIONS.md b/.gsd/DECISIONS.md index 0bf7f17..0de68b9 100644 --- a/.gsd/DECISIONS.md +++ b/.gsd/DECISIONS.md @@ -48,3 +48,4 @@ | D040 | M021/S02 | architecture | Creator-scoped retrieval cascade strategy | Sequential 4-tier cascade (creator → domain → global → none) with ll_keywords scoping and post-filtering | Sequential cascade is simpler than parallel-with-priority and avoids wasted LightRAG calls when early tiers succeed. ll_keywords hints LightRAG's retrieval without hard constraints. Post-filtering on tier 1 ensures strict creator scoping while 3x oversampling compensates for filtering losses. Domain tier uses ≥2 page threshold to avoid noise from sparse creators. | Yes | agent | | D041 | M022/S05 | architecture | Highlight scorer weight distribution for 10-dimension model | Original 7 dimensions reduced proportionally, new 3 audio proxy dimensions (speech_rate_variance, pause_density, speaking_pace) allocated 0.22 total weight. Audio dims default to 0.5 (neutral) when word_timings unavailable for backward compatibility. | Audio proxy signals derived from word-level timing data provide meaningful highlight quality indicators without requiring raw audio analysis (librosa). Neutral fallback ensures existing scoring paths are unaffected. | Yes | agent | | D042 | M023/S01 | architecture | Rich text editor for creator posts | Tiptap (headless, React) with StarterKit + Link + Placeholder extensions. Store Tiptap JSON as canonical format in JSONB column, render client-side via @tiptap/html. | Headless architecture fits dark theme customization. Large ecosystem, well-maintained. JSON storage is lossless and enables future server-side rendering. No HTML sanitization needed since canonical format is structured JSON. | Yes | agent | +| D043 | M023/S02 | architecture | Personality weight → system prompt modulation strategy | 3-tier intensity (<0.4 subtle reference, 0.4-0.8 adopt voice, ≥0.8 fully embody) with temperature scaling 0.3–0.5 linear on weight | Stepped intensity prevents jarring persona at low weights while allowing full creator voice at high values. Temperature stays in 0.3-0.5 range to keep responses factually grounded even at maximum personality — wider ranges risk hallucination in a knowledge-base context. | Yes | agent | diff --git a/.gsd/milestones/M023/M023-ROADMAP.md b/.gsd/milestones/M023/M023-ROADMAP.md index bcc03f7..b96c9e0 100644 --- a/.gsd/milestones/M023/M023-ROADMAP.md +++ b/.gsd/milestones/M023/M023-ROADMAP.md @@ -7,7 +7,7 @@ The demo MVP comes together. Chat widget wires to the intelligence layer (INT-1) | ID | Slice | Risk | Depends | Done | After this | |----|-------|------|---------|------|------------| | S01 | [A] Post Editor + File Sharing | high | — | ✅ | Creator writes rich text posts with file attachments (presets, sample packs). Followers see posts in feed. Files downloadable via signed URLs. | -| S02 | [A] Chat Widget ↔ Chat Engine Wiring (INT-1) | high | — | ⬜ | Chat widget on creator profile wired to chat engine. Personality slider adjusts response style. Citations link to sources. | +| S02 | [A] Chat Widget ↔ Chat Engine Wiring (INT-1) | high | — | ✅ | Chat widget on creator profile wired to chat engine. Personality slider adjusts response style. Citations link to sources. | | S03 | [B] Shorts Generation Pipeline v1 | medium | — | ⬜ | Shorts pipeline extracts clips from highlight boundaries in 3 format presets (vertical, square, horizontal) | | S04 | [B] Personality Slider (Full Interpolation) | medium | — | ⬜ | Personality slider at 0.0 gives encyclopedic response. At 1.0 gives creator-voiced response with their speech patterns. | | S05 | Forgejo KB Update — Demo Build Docs | low | S01, S02, S03, S04 | ⬜ | Forgejo wiki updated with post editor, MinIO, chat integration, shorts pipeline, personality system | diff --git a/.gsd/milestones/M023/slices/S02/S02-SUMMARY.md b/.gsd/milestones/M023/slices/S02/S02-SUMMARY.md new file mode 100644 index 0000000..4fc625e --- /dev/null +++ b/.gsd/milestones/M023/slices/S02/S02-SUMMARY.md @@ -0,0 +1,88 @@ +--- +id: S02 +parent: M023 +milestone: M023 +provides: + - personality_weight API contract (0.0–1.0 float in ChatRequest) + - ChatWidget slider UI component for personality control + - System prompt modulation based on Creator.personality_profile JSONB +requires: + [] +affects: + - S04 +key_files: + - backend/routers/chat.py + - backend/chat_service.py + - backend/tests/test_chat.py + - frontend/src/api/chat.ts + - frontend/src/components/ChatWidget.tsx + - frontend/src/components/ChatWidget.module.css +key_decisions: + - D043: 3-tier personality intensity (<0.4 subtle, 0.4-0.8 voice, ≥0.8 embody) with temperature 0.3-0.5 + - Slider placed below chat header as own row with border-bottom separator +patterns_established: + - personality_weight as a float param threaded through API → service → LLM call, available for other endpoints to reuse + - Graceful JSONB profile extraction with fallback: missing keys in nested dicts default to empty rather than crashing +observability_surfaces: + - chat_request log line includes weight= for tracking personality usage in production logs +drill_down_paths: + - .gsd/milestones/M023/slices/S02/tasks/T01-SUMMARY.md + - .gsd/milestones/M023/slices/S02/tasks/T02-SUMMARY.md +duration: "" +verification_result: passed +completed_at: 2026-04-04T09:32:28.770Z +blocker_discovered: false +--- + +# S02: [A] Chat Widget ↔ Chat Engine Wiring (INT-1) + +**Chat widget sends personality_weight (0.0–1.0) to chat engine; backend modulates system prompt with creator voice profile and scales LLM temperature; frontend slider wired end-to-end.** + +## What Happened + +Two tasks delivered the full personality-weight pipeline from UI slider to LLM prompt modulation. + +**T01 — Backend personality weight threading.** Added `personality_weight` (float, 0.0–1.0, Pydantic-validated) to `ChatRequest`. When weight > 0 and a creator is specified, `ChatService.stream_response()` queries `Creator.personality_profile` JSONB, extracts voice cues (signature phrases, tone descriptors, teaching style, energy, formality), and appends a personality injection block to the system prompt. Three intensity tiers: subtle reference (<0.4), voice adoption (0.4–0.8), full embodiment (≥0.8). Temperature scales linearly from 0.3 (encyclopedic) to 0.5 (full personality). Graceful fallback to encyclopedic prompt on missing creator, null profile, or DB error. 9 new tests covering all paths — weight forwarding, prompt injection content, null/missing fallback, validation boundaries. All 22 tests pass. + +**T02 — Frontend slider wiring.** Added `personalityWeight` optional parameter to `streamChat()` in `chat.ts`, sent as `personality_weight` in the POST body. `ChatWidget.tsx` gained a range input (0.0–1.0, step 0.1) in a styled row between header and messages. Labels show "Encyclopedic" at left and "Creator Voice" at right. Custom CSS for thumb/track matching the dark theme with cyan accent. Value flows through `sendMessage` → `streamChat` → API. Frontend builds clean (184 modules, 0 errors). + +## Verification + +Backend: `cd backend && python -m pytest tests/test_chat.py -v` — 22 passed (13 existing + 9 new), 0 failed. Frontend: `cd frontend && npm run build` — 184 modules, 0 errors, built in 2.88s. + +## Requirements Advanced + +None. + +## Requirements Validated + +None. + +## New Requirements Surfaced + +None. + +## Requirements Invalidated or Re-scoped + +None. + +## Deviations + +T01 simplified one test from wraps-based mock approach to capture-kwargs due to unbound method limitation. No functional impact. + +## Known Limitations + +Personality profile must be pre-populated in Creator.personality_profile JSONB (from M022 extraction pipeline). Creators without profiles always get encyclopedic responses regardless of slider position. + +## Follow-ups + +S04 (Personality Slider Full Interpolation) will refine the prompt modulation with continuous interpolation rather than the current 3-tier step function. + +## Files Created/Modified + +- `backend/routers/chat.py` — Added personality_weight field to ChatRequest, forwarded to stream_response() +- `backend/chat_service.py` — Added personality_weight param, Creator profile query, system prompt modulation, temperature scaling +- `backend/tests/test_chat.py` — 9 new tests: weight forwarding, prompt injection, null fallback, validation boundaries +- `frontend/src/api/chat.ts` — Added personalityWeight param to streamChat(), sent as personality_weight in POST body +- `frontend/src/components/ChatWidget.tsx` — Added personality slider state + range input in header with Encyclopedic/Creator Voice labels +- `frontend/src/components/ChatWidget.module.css` — Slider row styles, custom range input styling matching dark theme diff --git a/.gsd/milestones/M023/slices/S02/S02-UAT.md b/.gsd/milestones/M023/slices/S02/S02-UAT.md new file mode 100644 index 0000000..923de00 --- /dev/null +++ b/.gsd/milestones/M023/slices/S02/S02-UAT.md @@ -0,0 +1,64 @@ +# S02: [A] Chat Widget ↔ Chat Engine Wiring (INT-1) — UAT + +**Milestone:** M023 +**Written:** 2026-04-04T09:32:28.770Z + +## UAT: Chat Widget ↔ Chat Engine Wiring (INT-1) + +### Preconditions +- Backend running with PostgreSQL containing at least one Creator with a populated `personality_profile` JSONB field +- Frontend built and served +- Chat widget accessible on a creator profile page + +### Test Cases + +#### TC1: Personality weight accepted by API +1. POST `/api/v1/chat` with `{"query": "test", "personality_weight": 0.5}` +2. **Expected:** 200 OK, SSE stream begins. No validation error. + +#### TC2: Personality weight validation — above range +1. POST `/api/v1/chat` with `{"query": "test", "personality_weight": 1.5}` +2. **Expected:** 422 Unprocessable Entity with validation error on personality_weight. + +#### TC3: Personality weight validation — below range +1. POST `/api/v1/chat` with `{"query": "test", "personality_weight": -0.1}` +2. **Expected:** 422 Unprocessable Entity with validation error on personality_weight. + +#### TC4: Personality weight validation — string type +1. POST `/api/v1/chat` with `{"query": "test", "personality_weight": "high"}` +2. **Expected:** 422 Unprocessable Entity. + +#### TC5: Default weight (0.0) — encyclopedic response +1. POST `/api/v1/chat` with `{"query": "what is sidechain compression", "creator": "COPYCATT"}` (no personality_weight) +2. **Expected:** Response uses encyclopedic tone. No creator voice cues in response. + +#### TC6: High weight with valid profile — creator voice +1. POST `/api/v1/chat` with `{"query": "what is sidechain compression", "creator": "COPYCATT", "personality_weight": 0.9}` +2. **Expected:** Response includes creator voice characteristics (signature phrases, teaching style from profile). Tone noticeably different from TC5. + +#### TC7: High weight with missing creator — graceful fallback +1. POST `/api/v1/chat` with `{"query": "test", "creator": "NonExistentCreator", "personality_weight": 1.0}` +2. **Expected:** 200 OK, encyclopedic response. No error. Check logs for DEBUG-level fallback message. + +#### TC8: High weight with null profile — graceful fallback +1. For a Creator with `personality_profile = NULL`, POST with `{"query": "test", "creator": "", "personality_weight": 0.8}` +2. **Expected:** 200 OK, encyclopedic response. No crash, no error event. + +#### TC9: Slider renders in chat widget +1. Open a creator profile page in the browser +2. Open the chat widget +3. **Expected:** Slider visible in panel header area. "Encyclopedic" label on left, "Creator Voice" on right. Default position at 0. + +#### TC10: Slider value flows to API +1. Move slider to ~0.7 +2. Send a chat message +3. **Expected:** Network tab shows POST to `/api/v1/chat` with `personality_weight: 0.7` in request body. + +#### TC11: Slider at extremes +1. Set slider to 0.0, send message. Set slider to 1.0, send message. +2. **Expected:** Both requests succeed. Weight 0.0 → encyclopedic tone. Weight 1.0 → maximum creator voice (if profile exists). + +### Edge Cases +- Creator with empty personality_profile (`{}`) → should fallback to encyclopedic (no crash on missing nested keys) +- Rapid slider changes between messages → each message sends the current slider value at time of send +- Chat widget on creator with no content in knowledge base → personality modulation still applies to system prompt even if retrieval returns nothing diff --git a/.gsd/milestones/M023/slices/S02/tasks/T02-VERIFY.json b/.gsd/milestones/M023/slices/S02/tasks/T02-VERIFY.json new file mode 100644 index 0000000..59f089d --- /dev/null +++ b/.gsd/milestones/M023/slices/S02/tasks/T02-VERIFY.json @@ -0,0 +1,24 @@ +{ + "schemaVersion": 1, + "taskId": "T02", + "unitId": "M023/S02/T02", + "timestamp": 1775295056282, + "passed": false, + "discoverySource": "task-plan", + "checks": [ + { + "command": "cd frontend", + "exitCode": 0, + "durationMs": 9, + "verdict": "pass" + }, + { + "command": "npm run build", + "exitCode": 254, + "durationMs": 109, + "verdict": "fail" + } + ], + "retryAttempt": 1, + "maxRetries": 2 +} diff --git a/.gsd/milestones/M023/slices/S03/S03-PLAN.md b/.gsd/milestones/M023/slices/S03/S03-PLAN.md index 50c2e97..47024d7 100644 --- a/.gsd/milestones/M023/slices/S03/S03-PLAN.md +++ b/.gsd/milestones/M023/slices/S03/S03-PLAN.md @@ -1,6 +1,196 @@ # S03: [B] Shorts Generation Pipeline v1 -**Goal:** Build ffmpeg-based shorts generation pipeline with format presets via Celery workers +**Goal:** Shorts generation pipeline extracts video clips from approved highlight candidates in 3 format presets (vertical 9:16, square 1:1, horizontal 16:9), stores them in MinIO, and exposes generate/list/download through API and frontend UI. **Demo:** After this: Shorts pipeline extracts clips from highlight boundaries in 3 format presets (vertical, square, horizontal) ## Tasks +- [x] **T01: Added GeneratedShort model with FormatPreset/ShortStatus enums, migration 025, video_source_path config, ffmpeg in Docker, and /videos volume mount on API and worker services** — ## Description + +Set up all infrastructure for the shorts pipeline: new SQLAlchemy model, Alembic migration, config setting for video source path, ffmpeg in Docker image, and volume mount for original video files. + +## Steps + +1. Add `FormatPreset` enum (vertical/square/horizontal) and `ShortStatus` enum (pending/processing/complete/failed) to `backend/models.py` +2. Add `GeneratedShort` model to `backend/models.py`: id (UUID pk), highlight_candidate_id (FK to highlight_candidates.id), format_preset (FormatPreset enum), minio_object_key (String, nullable), duration_secs (Float, nullable), width (Integer), height (Integer), file_size_bytes (BigInteger, nullable), status (ShortStatus, default pending), error_message (Text, nullable), created_at, updated_at. Add relationship back to HighlightCandidate. +3. Create Alembic migration `alembic/versions/025_add_generated_shorts.py` — create `generated_shorts` table with both new enums. Use pattern from existing migrations (e.g., 024). +4. Add `video_source_path: str = "/videos"` to `backend/config.py` Settings class. +5. Add `ffmpeg` to `apt-get install` line in `docker/Dockerfile.api`. +6. Add volume mount `- /vmPool/r/services/chrysopedia_videos:/videos:ro` to both chrysopedia-worker and chrysopedia-api services in `docker-compose.yml`. +7. Verify model imports and migration runs. + +## Must-Haves + +- [ ] FormatPreset and ShortStatus enums defined +- [ ] GeneratedShort model with all columns and FK to highlight_candidates +- [ ] Migration 025 creates table and enums +- [ ] video_source_path config setting with /videos default +- [ ] ffmpeg in Dockerfile.api apt-get +- [ ] Volume mount in docker-compose.yml for worker + +## Verification + +- `cd backend && python -c "from models import GeneratedShort, FormatPreset, ShortStatus; print('OK')"` — model imports +- `grep ffmpeg docker/Dockerfile.api` — ffmpeg in Dockerfile +- `grep video_source_path backend/config.py` — config present +- `grep chrysopedia_videos docker-compose.yml` — volume mount present + +## Inputs + +- `backend/models.py` — existing HighlightCandidate model to add FK relationship +- `backend/config.py` — existing Settings class to extend +- `docker/Dockerfile.api` — existing Dockerfile to add ffmpeg +- `docker-compose.yml` — existing compose file to add volume mount +- `alembic/versions/024_add_posts_and_attachments.py` — pattern reference for migration + +## Expected Output + +- `backend/models.py` — updated with FormatPreset, ShortStatus, GeneratedShort +- `backend/config.py` — updated with video_source_path +- `docker/Dockerfile.api` — updated with ffmpeg +- `docker-compose.yml` — updated with video volume mount +- `alembic/versions/025_add_generated_shorts.py` — new migration file + - Estimate: 30m + - Files: backend/models.py, backend/config.py, docker/Dockerfile.api, docker-compose.yml, alembic/versions/025_add_generated_shorts.py + - Verify: cd backend && python -c "from models import GeneratedShort, FormatPreset, ShortStatus; print('OK')" && grep -q ffmpeg ../docker/Dockerfile.api && grep -q video_source_path config.py +- [ ] **T02: Build ffmpeg clip generator module and Celery task with MinIO upload** — ## Description + +Create the pure ffmpeg wrapper module with 3 format presets, then wire a Celery task that reads an approved highlight, resolves the video file path, generates clips for each preset, uploads to MinIO, and updates DB status. + +## Failure Modes + +| Dependency | On error | On timeout | On malformed response | +|------------|----------|-----------|----------------------| +| ffmpeg subprocess | Set ShortStatus.failed + capture stderr in error_message | subprocess timeout (300s) → same as error | N/A (binary output) | +| MinIO upload | Set ShortStatus.failed + log error | Retry once, then fail preset | N/A | +| Video file on disk | Set all presets to failed + log missing path | N/A | N/A | + +## Load Profile + +- **Shared resources**: Celery worker (concurrency=1), /tmp disk for intermediate files +- **Per-operation cost**: 1 DB read + 3 ffmpeg encodes + 3 MinIO uploads + 4 DB writes +- **10x breakpoint**: Worker queue backlog — single concurrency means jobs queue. Acceptable for v1. + +## Negative Tests + +- **Malformed inputs**: Missing video file → all presets fail with descriptive error +- **Error paths**: ffmpeg returns non-zero → preset marked failed, others still attempted +- **Boundary conditions**: Highlight with 0-second duration, highlight already processing (reject) + +## Steps + +1. Create `backend/pipeline/shorts_generator.py`: + - Define `PRESETS` dict mapping FormatPreset → (width, height, ffmpeg_vf_filter): + - vertical: 1080×1920, `scale=1080:-2,pad=1080:1920:(ow-iw)/2:(oh-ih)/2:black` + - square: 1080×1080, `crop=min(iw\,ih):min(iw\,ih),scale=1080:1080` + - horizontal: 1920×1080, `scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2:black` + - `extract_clip(input_path, output_path, start, end, vf_filter)` → runs ffmpeg via subprocess.run with timeout=300s, raises on non-zero return code + - `resolve_video_path(video_source_path, file_path)` → joins and validates file exists +2. Add `stage_generate_shorts` Celery task to `backend/pipeline/stages.py`: + - Takes highlight_candidate_id as argument + - Uses sync SQLAlchemy (existing `_SessionLocal` pattern) + - Loads HighlightCandidate with joined SourceVideo and KeyMoment + - Validates status is 'approved' and no existing 'processing' shorts + - Computes effective start/end (trim overrides key moment times) + - Resolves absolute video path via config.video_source_path + source_video.file_path + - For each FormatPreset: create GeneratedShort row (status=processing), run extract_clip to /tmp, upload to MinIO as `shorts/{highlight_id}/{preset_name}.mp4`, update row (status=complete, file_size, minio_object_key), clean up /tmp file. On error: set status=failed, store error_message, continue to next preset. + - Log progress per-preset. +3. Verify: module imports, task is registered in Celery. + +## Must-Haves + +- [ ] shorts_generator.py with 3 presets and extract_clip function +- [ ] resolve_video_path validates file exists before ffmpeg +- [ ] Celery task creates GeneratedShort rows and updates status per-preset +- [ ] Each preset independent — one failure doesn't block others +- [ ] Clips uploaded to MinIO under shorts/{highlight_id}/{preset}.mp4 +- [ ] Temp files cleaned up after upload + +## Verification + +- `cd backend && python -c "from pipeline.shorts_generator import extract_clip, PRESETS, resolve_video_path; print('generator OK')"` — module imports +- `cd backend && python -c "from pipeline.stages import stage_generate_shorts; print('task OK')"` — task imports +- `grep -q 'stage_generate_shorts' backend/pipeline/stages.py` — task registered + +## Observability Impact + +- Signals added: Per-preset structured log lines in Celery task (highlight_id, preset, status, duration_ms, file_size or error) +- How a future agent inspects: Query `generated_shorts` table WHERE highlight_candidate_id = X, check status and error_message columns +- Failure state exposed: error_message column stores ffmpeg stderr on failure; task-level exception logged to Celery worker output + +## Inputs + +- `backend/models.py` — GeneratedShort, FormatPreset, ShortStatus from T01 +- `backend/config.py` — video_source_path setting from T01 +- `backend/pipeline/stages.py` — existing Celery task patterns +- `backend/minio_client.py` — upload_file function +- `backend/worker.py` — Celery app instance + +## Expected Output + +- `backend/pipeline/shorts_generator.py` — new ffmpeg wrapper module +- `backend/pipeline/stages.py` — updated with stage_generate_shorts task + - Estimate: 45m + - Files: backend/pipeline/shorts_generator.py, backend/pipeline/stages.py, backend/minio_client.py + - Verify: cd backend && python -c "from pipeline.shorts_generator import extract_clip, PRESETS, resolve_video_path; print('OK')" && python -c "from pipeline.stages import stage_generate_shorts; print('OK')" +- [ ] **T03: Add shorts API endpoints and frontend generate button with status display** — ## Description + +Expose the shorts pipeline through API endpoints (trigger generation, list shorts, download link) and add a "Generate Shorts" button to the HighlightQueue UI for approved highlights with status badges and download links. + +## Steps + +1. Create `backend/routers/shorts.py`: + - `POST /admin/shorts/generate/{highlight_id}` — validate highlight exists and is approved, check no shorts already processing, dispatch `stage_generate_shorts.delay(highlight_id)`, return 202 with status. Use async SQLAlchemy session (FastAPI pattern from other routers). + - `GET /admin/shorts/{highlight_id}` — list all GeneratedShort rows for a highlight with status, format_preset, file_size_bytes, created_at. + - `GET /admin/shorts/download/{short_id}` — look up GeneratedShort by id, validate status is complete, return presigned MinIO URL via `minio_client.generate_download_url()`. + - Response schemas: `GeneratedShortResponse` (id, highlight_candidate_id, format_preset, status, error_message, file_size_bytes, duration_secs, width, height, created_at), `ShortsListResponse` (shorts: list[GeneratedShortResponse]), `GenerateResponse` (status, message). +2. Register router in `backend/main.py`: `app.include_router(shorts.router, prefix="/api/v1")` +3. Create `frontend/src/api/shorts.ts`: + - `generateShorts(highlightId: string)` → POST + - `fetchShorts(highlightId: string)` → GET list + - `getShortDownloadUrl(shortId: string)` → GET download URL + - Types: `GeneratedShort`, `ShortsListResponse` +4. Update `frontend/src/pages/HighlightQueue.tsx`: + - Add "Generate Shorts" button on approved highlights (only when no shorts exist or all failed) + - Show generation status badges per-preset (pending/processing/complete/failed) + - Show download links for completed shorts + - Poll for status updates while any shorts are processing (5s interval) + - Add shorts state to component (Map) +5. Update `frontend/src/pages/HighlightQueue.module.css` with styles for generate button, status badges, download links. + +## Must-Haves + +- [ ] Three API endpoints: generate trigger, list, download +- [ ] Router registered in main.py +- [ ] Frontend API client with types +- [ ] Generate button visible only on approved highlights +- [ ] Status badges show per-preset state +- [ ] Download links for completed shorts +- [ ] Polling while shorts are processing + +## Verification + +- `grep -q 'shorts' backend/main.py` — router registered +- `cd backend && python -c "from routers.shorts import router; print('router OK')"` — router imports +- `cd frontend && npx tsc --noEmit` — TypeScript compiles +- `cd frontend && npm run build` — frontend builds + +## Inputs + +- `backend/models.py` — GeneratedShort, FormatPreset, ShortStatus from T01 +- `backend/pipeline/stages.py` — stage_generate_shorts task from T02 +- `backend/minio_client.py` — generate_download_url function +- `backend/main.py` — existing router registration pattern +- `frontend/src/pages/HighlightQueue.tsx` — existing highlight queue UI +- `frontend/src/pages/HighlightQueue.module.css` — existing styles +- `frontend/src/api/highlights.ts` — existing API client pattern + +## Expected Output + +- `backend/routers/shorts.py` — new router file +- `backend/main.py` — updated with shorts router +- `frontend/src/api/shorts.ts` — new API client +- `frontend/src/pages/HighlightQueue.tsx` — updated with generate button and status +- `frontend/src/pages/HighlightQueue.module.css` — updated with new styles + - Estimate: 45m + - Files: backend/routers/shorts.py, backend/main.py, frontend/src/api/shorts.ts, frontend/src/pages/HighlightQueue.tsx, frontend/src/pages/HighlightQueue.module.css + - Verify: grep -q 'shorts' backend/main.py && cd backend && python -c "from routers.shorts import router; print('OK')" && cd ../frontend && npx tsc --noEmit diff --git a/.gsd/milestones/M023/slices/S03/S03-RESEARCH.md b/.gsd/milestones/M023/slices/S03/S03-RESEARCH.md new file mode 100644 index 0000000..db6cdee --- /dev/null +++ b/.gsd/milestones/M023/slices/S03/S03-RESEARCH.md @@ -0,0 +1,118 @@ +# S03 Research — Shorts Generation Pipeline v1 + +## Summary + +Build a pipeline that takes approved highlight candidates (with trim boundaries) and generates video clips in 3 format presets (vertical 9:16, square 1:1, horizontal 16:9). The data model for highlights already exists with scoring, triage, and trim boundaries. The main work is: (1) adding ffmpeg to the Docker image, (2) a Celery task for clip extraction, (3) a DB model/migration for generated shorts, (4) MinIO storage for output clips, (5) API endpoints to trigger generation and retrieve results, and (6) a "Generate Shorts" button in the existing HighlightQueue UI. + +**Critical constraint:** Original video files are NOT on the server. Source videos were transcribed on a GPU desktop and only transcripts were ingested. The `file_path` column on `SourceVideo` (e.g., `Chee/12 Day 5 - Music Business - Part 1.mp4`) is a relative path with no corresponding file in `/data/`. The pipeline needs a video source path — either mounting the original video directory or uploading videos to MinIO first. + +## Recommendation + +**Targeted research** — known technology (ffmpeg, Celery), moderate integration complexity (video file access, Docker image change, new model + migration). + +Approach: Add ffmpeg to the Docker image. Create a new `GeneratedShort` model tracking each rendered clip. Add a Celery task `stage_generate_short` that reads an approved highlight, extracts the clip with ffmpeg, re-encodes to each format preset, and uploads to MinIO. Expose API endpoints for triggering generation per-highlight and listing/downloading generated shorts. Add a "Generate" button in the HighlightQueue UI for approved highlights. + +For the video file access problem: add a configurable `video_source_path` setting (default `/videos`) and a new Docker Compose volume mount pointing to where the original videos live on ub01's filesystem. This is the simplest approach — the files exist somewhere on the host, we just need to mount them. + +## Implementation Landscape + +### Existing Code (what's already built) + +| File | Role | Key Details | +|------|------|-------------| +| `backend/models.py` | `HighlightCandidate` model | `id`, `key_moment_id`, `source_video_id`, `score`, `status` (candidate/approved/rejected), `trim_start`, `trim_end`, `duration_secs`. `HighlightStatus` enum. | +| `backend/models.py` | `KeyMoment` model | `start_time`, `end_time` (floats, seconds). Linked to `SourceVideo`. | +| `backend/models.py` | `SourceVideo` model | `file_path` (relative, e.g. `Chee/video.mp4`), `filename`, `creator_id`, `duration_seconds`. | +| `backend/routers/creator_highlights.py` | Creator highlight endpoints | List, detail, status update (approve/reject), trim update. Auth-guarded. `shorts_only` filter (≤60s). | +| `backend/routers/highlights.py` | Admin highlight endpoints | Trigger detection, list candidates. No auth. | +| `backend/pipeline/stages.py` | Celery tasks | Pattern: `@celery_app.task(bind=True, ...)`. Uses sync SQLAlchemy (`_engine`, `_SessionLocal` module globals). | +| `backend/pipeline/highlight_scorer.py` | Pure scoring function | `score_moment()` — pure function, no DB. | +| `backend/pipeline/highlight_schemas.py` | Pydantic schemas | `HighlightCandidateResponse`, `HighlightScoreBreakdown`, `HighlightBatchResult`. | +| `backend/minio_client.py` | MinIO singleton | `upload_file()`, `generate_download_url()`, `delete_file()`. Bucket auto-creation. | +| `backend/config.py` | Settings | `minio_url`, `minio_access_key`, `minio_secret_key`, `minio_bucket`. `video_metadata_path: str = "/data/video_meta"`. | +| `backend/worker.py` | Celery app instance | Standard Celery setup, imported by stages.py. | +| `docker/Dockerfile.api` | API/worker image | `python:3.12-slim`. System deps: `gcc libpq-dev curl`. **No ffmpeg.** | +| `docker-compose.yml` | Stack | Worker uses same image as API. Volumes: `/vmPool/r/services/chrysopedia_data:/data`. MinIO at `chrysopedia-minio:9000`. | +| `frontend/src/pages/HighlightQueue.tsx` | Highlight triage UI | Lists highlights, approve/reject/trim. Has "Shorts" filter tab. | +| `frontend/src/api/highlights.ts` | Highlight API client | `fetchCreatorHighlights()`, `updateHighlightStatus()`, `trimHighlight()`. | + +### What Needs to Be Built + +| Component | Description | +|-----------|-------------| +| **ffmpeg in Docker image** | Add `ffmpeg` to `apt-get install` in `docker/Dockerfile.api`. Single line change. | +| **Video source volume mount** | Add volume mount to docker-compose.yml for worker container pointing to original video files. Add `video_source_path` config setting. | +| **`GeneratedShort` model** | New SQLAlchemy model: `id`, `highlight_candidate_id` (FK), `format_preset` (enum: vertical/square/horizontal), `minio_object_key`, `duration_secs`, `width`, `height`, `file_size_bytes`, `status` (enum: pending/processing/complete/failed), `error_message`, `created_at`. | +| **Alembic migration** | `025_add_generated_shorts.py` — create `generated_shorts` table with `FormatPreset` and `ShortStatus` enums. | +| **`shorts_generator.py`** | Pure ffmpeg wrapper module: `generate_clip(input_path, output_path, start, end, width, height)`. Uses `subprocess.run(["ffmpeg", ...])`. Three preset functions: `vertical_preset()` → 1080×1920, `square_preset()` → 1080×1080, `horizontal_preset()` → 1920×1080. Each applies crop/pad/scale filters. | +| **Celery task `stage_generate_shorts`** | New task in `stages.py`. For an approved highlight: resolve video file path, compute effective start/end (trim overrides key moment times), call `shorts_generator` for each preset, upload results to MinIO under `shorts/{highlight_id}/{preset}.mp4`, update `GeneratedShort` rows. | +| **API endpoints** | `POST /admin/shorts/generate/{highlight_id}` — trigger generation. `GET /admin/shorts/{highlight_id}` — list generated shorts for a highlight. `GET /admin/shorts/{short_id}/download` — presigned URL. | +| **Frontend "Generate" button** | Add to HighlightQueue: "Generate Shorts" button on approved highlights. Show generation status and download links when complete. | + +### Format Presets (ffmpeg filter chains) + +| Preset | Output | ffmpeg Strategy | +|--------|--------|-----------------| +| **Vertical** (9:16) | 1080×1920 | Scale to fit width 1080, then pad vertically to 1920 (letterbox with black) or crop if source is wider. `scale=1080:-2,pad=1080:1920:(ow-iw)/2:(oh-ih)/2:black` | +| **Square** (1:1) | 1080×1080 | Center-crop to square. `crop=min(iw\,ih):min(iw\,ih),scale=1080:1080` | +| **Horizontal** (16:9) | 1920×1080 | Scale to fit, pad if needed. Most source content is already 16:9 so this is usually just a re-encode with trim. `scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2:black` | + +### Clip Extraction ffmpeg Command Pattern + +``` +ffmpeg -ss {start} -to {end} -i {input} \ + -vf "{scale_filter}" \ + -c:v libx264 -preset medium -crf 23 \ + -c:a aac -b:a 128k \ + -movflags +faststart \ + -y {output} +``` + +Key: `-ss` before `-i` for fast seek. `-movflags +faststart` for web playback. CRF 23 is good quality/size balance. `-preset medium` balances speed vs compression. + +### Effective Time Boundaries + +The trim logic: if `trim_start`/`trim_end` are set on the HighlightCandidate, use those. Otherwise fall back to the KeyMoment's `start_time`/`end_time`. The creator_highlights router already exposes this — the Celery task mirrors the logic. + +### MinIO Storage Layout + +``` +shorts/ + {highlight_id}/ + vertical.mp4 + square.mp4 + horizontal.mp4 +``` + +### Risks and Constraints + +1. **Video file access** — Original videos are NOT on the server. Need to locate them on ub01's filesystem and add a volume mount. The `file_path` column has relative paths like `Chee/video.mp4`. A config setting `video_source_path` resolves these to absolute paths: `{video_source_path}/{file_path}`. + +2. **ffmpeg not in Docker image** — Need to add to `apt-get install` in Dockerfile.api. Worker shares this image. This is a one-line change but requires a full image rebuild. + +3. **Worker concurrency** — Worker runs with `--concurrency=1`. Video encoding is CPU-heavy. Long-running ffmpeg jobs will block other Celery tasks. Acceptable for v1 (single-admin tool), but worth noting. + +4. **Disk space** — Temporary files during encoding. Use `/tmp` in the container, clean up after MinIO upload. Worker container doesn't have persistent temp storage, which is fine — clips are small (≤60s segments). + +5. **Error handling** — ffmpeg can fail for many reasons (corrupt input, unsupported codec, disk full). Each format preset should be independent — if vertical fails, still attempt square and horizontal. Status tracked per-short. + +### Natural Task Decomposition + +1. **T01: Model + Migration + Config** — Add `GeneratedShort` model, `FormatPreset` enum, `ShortStatus` enum, migration 025, `video_source_path` config setting. Pure DB/config work. Verify: migration runs, model imports. + +2. **T02: ffmpeg Module + Docker** — Create `backend/pipeline/shorts_generator.py` with clip extraction functions. Add ffmpeg to Dockerfile.api. Verify: unit test with a synthetic test (mock subprocess) + manual ffmpeg command test. + +3. **T03: Celery Task + MinIO Integration** — Add `stage_generate_shorts` task to stages.py. Wire up: read highlight → resolve video path → call generator for each preset → upload to MinIO → update DB status. Verify: task dispatches and completes (needs running stack). + +4. **T04: API Endpoints** — Add `backend/routers/shorts.py` with generate trigger, list, and download endpoints. Register in main.py. Verify: curl tests against endpoints. + +5. **T05: Frontend Generate Button + Status** — Add generate button to HighlightQueue for approved highlights. Show status badges and download links. Add API client functions. Verify: visual in browser. + +### Don't Hand-Roll + +- **ffmpeg wrapper**: Use `subprocess.run` directly, not `ffmpeg-python` or `moviepy`. The command is simple enough that a wrapper library adds dependency weight without value. The codebase already uses subprocess patterns (whisper script). +- **Video format detection**: Don't try to detect input format/resolution and compute optimal filters dynamically in v1. Use fixed output presets with ffmpeg's built-in scaling/padding. If source is 16:9 and output is 16:9, ffmpeg's `force_original_aspect_ratio=decrease` + `pad` handles it correctly regardless. + +### Skill Suggestions + +- `digitalsamba/claude-code-video-toolkit@ffmpeg` (1.9K installs) — ffmpeg patterns for video processing. Potentially useful for the ffmpeg filter chain authoring. Install: `npx skills add digitalsamba/claude-code-video-toolkit@ffmpeg` diff --git a/.gsd/milestones/M023/slices/S03/tasks/T01-PLAN.md b/.gsd/milestones/M023/slices/S03/tasks/T01-PLAN.md new file mode 100644 index 0000000..438af4e --- /dev/null +++ b/.gsd/milestones/M023/slices/S03/tasks/T01-PLAN.md @@ -0,0 +1,73 @@ +--- +estimated_steps: 34 +estimated_files: 5 +skills_used: [] +--- + +# T01: Add GeneratedShort model, migration, config, and Docker infrastructure + +## Description + +Set up all infrastructure for the shorts pipeline: new SQLAlchemy model, Alembic migration, config setting for video source path, ffmpeg in Docker image, and volume mount for original video files. + +## Steps + +1. Add `FormatPreset` enum (vertical/square/horizontal) and `ShortStatus` enum (pending/processing/complete/failed) to `backend/models.py` +2. Add `GeneratedShort` model to `backend/models.py`: id (UUID pk), highlight_candidate_id (FK to highlight_candidates.id), format_preset (FormatPreset enum), minio_object_key (String, nullable), duration_secs (Float, nullable), width (Integer), height (Integer), file_size_bytes (BigInteger, nullable), status (ShortStatus, default pending), error_message (Text, nullable), created_at, updated_at. Add relationship back to HighlightCandidate. +3. Create Alembic migration `alembic/versions/025_add_generated_shorts.py` — create `generated_shorts` table with both new enums. Use pattern from existing migrations (e.g., 024). +4. Add `video_source_path: str = "/videos"` to `backend/config.py` Settings class. +5. Add `ffmpeg` to `apt-get install` line in `docker/Dockerfile.api`. +6. Add volume mount `- /vmPool/r/services/chrysopedia_videos:/videos:ro` to both chrysopedia-worker and chrysopedia-api services in `docker-compose.yml`. +7. Verify model imports and migration runs. + +## Must-Haves + +- [ ] FormatPreset and ShortStatus enums defined +- [ ] GeneratedShort model with all columns and FK to highlight_candidates +- [ ] Migration 025 creates table and enums +- [ ] video_source_path config setting with /videos default +- [ ] ffmpeg in Dockerfile.api apt-get +- [ ] Volume mount in docker-compose.yml for worker + +## Verification + +- `cd backend && python -c "from models import GeneratedShort, FormatPreset, ShortStatus; print('OK')"` — model imports +- `grep ffmpeg docker/Dockerfile.api` — ffmpeg in Dockerfile +- `grep video_source_path backend/config.py` — config present +- `grep chrysopedia_videos docker-compose.yml` — volume mount present + +## Inputs + +- `backend/models.py` — existing HighlightCandidate model to add FK relationship +- `backend/config.py` — existing Settings class to extend +- `docker/Dockerfile.api` — existing Dockerfile to add ffmpeg +- `docker-compose.yml` — existing compose file to add volume mount +- `alembic/versions/024_add_posts_and_attachments.py` — pattern reference for migration + +## Expected Output + +- `backend/models.py` — updated with FormatPreset, ShortStatus, GeneratedShort +- `backend/config.py` — updated with video_source_path +- `docker/Dockerfile.api` — updated with ffmpeg +- `docker-compose.yml` — updated with video volume mount +- `alembic/versions/025_add_generated_shorts.py` — new migration file + +## Inputs + +- `backend/models.py` +- `backend/config.py` +- `docker/Dockerfile.api` +- `docker-compose.yml` +- `alembic/versions/024_add_posts_and_attachments.py` + +## Expected Output + +- `backend/models.py` +- `backend/config.py` +- `docker/Dockerfile.api` +- `docker-compose.yml` +- `alembic/versions/025_add_generated_shorts.py` + +## Verification + +cd backend && python -c "from models import GeneratedShort, FormatPreset, ShortStatus; print('OK')" && grep -q ffmpeg ../docker/Dockerfile.api && grep -q video_source_path config.py diff --git a/.gsd/milestones/M023/slices/S03/tasks/T01-SUMMARY.md b/.gsd/milestones/M023/slices/S03/tasks/T01-SUMMARY.md new file mode 100644 index 0000000..98f52f6 --- /dev/null +++ b/.gsd/milestones/M023/slices/S03/tasks/T01-SUMMARY.md @@ -0,0 +1,85 @@ +--- +id: T01 +parent: S03 +milestone: M023 +provides: [] +requires: [] +affects: [] +key_files: ["backend/models.py", "backend/config.py", "docker/Dockerfile.api", "docker-compose.yml", "alembic/versions/025_add_generated_shorts.py"] +key_decisions: ["Used explicit enum creation in migration for clean up/down lifecycle"] +patterns_established: [] +drill_down_paths: [] +observability_surfaces: [] +duration: "" +verification_result: "All four slice verification checks pass: model imports OK, ffmpeg in Dockerfile confirmed, video_source_path in config confirmed, chrysopedia_videos volume mount in docker-compose.yml confirmed for both services." +completed_at: 2026-04-04T09:43:32.234Z +blocker_discovered: false +--- + +# T01: Added GeneratedShort model with FormatPreset/ShortStatus enums, migration 025, video_source_path config, ffmpeg in Docker, and /videos volume mount on API and worker services + +> Added GeneratedShort model with FormatPreset/ShortStatus enums, migration 025, video_source_path config, ffmpeg in Docker, and /videos volume mount on API and worker services + +## What Happened +--- +id: T01 +parent: S03 +milestone: M023 +key_files: + - backend/models.py + - backend/config.py + - docker/Dockerfile.api + - docker-compose.yml + - alembic/versions/025_add_generated_shorts.py +key_decisions: + - Used explicit enum creation in migration for clean up/down lifecycle +duration: "" +verification_result: passed +completed_at: 2026-04-04T09:43:32.235Z +blocker_discovered: false +--- + +# T01: Added GeneratedShort model with FormatPreset/ShortStatus enums, migration 025, video_source_path config, ffmpeg in Docker, and /videos volume mount on API and worker services + +**Added GeneratedShort model with FormatPreset/ShortStatus enums, migration 025, video_source_path config, ffmpeg in Docker, and /videos volume mount on API and worker services** + +## What Happened + +Added FormatPreset (vertical/square/horizontal) and ShortStatus (pending/processing/complete/failed) enums plus GeneratedShort model to models.py with FK to highlight_candidates. Created Alembic migration 025 with explicit enum creation. Added video_source_path config, ffmpeg to Dockerfile, and /videos:ro volume mount to both API and worker services. + +## Verification + +All four slice verification checks pass: model imports OK, ffmpeg in Dockerfile confirmed, video_source_path in config confirmed, chrysopedia_videos volume mount in docker-compose.yml confirmed for both services. + +## Verification Evidence + +| # | Command | Exit Code | Verdict | Duration | +|---|---------|-----------|---------|----------| +| 1 | `cd backend && python -c "from models import GeneratedShort, FormatPreset, ShortStatus; print('OK')"` | 0 | ✅ pass | 1000ms | +| 2 | `grep ffmpeg docker/Dockerfile.api` | 0 | ✅ pass | 50ms | +| 3 | `grep video_source_path backend/config.py` | 0 | ✅ pass | 50ms | +| 4 | `grep chrysopedia_videos docker-compose.yml` | 0 | ✅ pass | 50ms | + + +## Deviations + +None. + +## Known Issues + +None. + +## Files Created/Modified + +- `backend/models.py` +- `backend/config.py` +- `docker/Dockerfile.api` +- `docker-compose.yml` +- `alembic/versions/025_add_generated_shorts.py` + + +## Deviations +None. + +## Known Issues +None. diff --git a/.gsd/milestones/M023/slices/S03/tasks/T02-PLAN.md b/.gsd/milestones/M023/slices/S03/tasks/T02-PLAN.md new file mode 100644 index 0000000..809a338 --- /dev/null +++ b/.gsd/milestones/M023/slices/S03/tasks/T02-PLAN.md @@ -0,0 +1,102 @@ +--- +estimated_steps: 58 +estimated_files: 3 +skills_used: [] +--- + +# T02: Build ffmpeg clip generator module and Celery task with MinIO upload + +## Description + +Create the pure ffmpeg wrapper module with 3 format presets, then wire a Celery task that reads an approved highlight, resolves the video file path, generates clips for each preset, uploads to MinIO, and updates DB status. + +## Failure Modes + +| Dependency | On error | On timeout | On malformed response | +|------------|----------|-----------|----------------------| +| ffmpeg subprocess | Set ShortStatus.failed + capture stderr in error_message | subprocess timeout (300s) → same as error | N/A (binary output) | +| MinIO upload | Set ShortStatus.failed + log error | Retry once, then fail preset | N/A | +| Video file on disk | Set all presets to failed + log missing path | N/A | N/A | + +## Load Profile + +- **Shared resources**: Celery worker (concurrency=1), /tmp disk for intermediate files +- **Per-operation cost**: 1 DB read + 3 ffmpeg encodes + 3 MinIO uploads + 4 DB writes +- **10x breakpoint**: Worker queue backlog — single concurrency means jobs queue. Acceptable for v1. + +## Negative Tests + +- **Malformed inputs**: Missing video file → all presets fail with descriptive error +- **Error paths**: ffmpeg returns non-zero → preset marked failed, others still attempted +- **Boundary conditions**: Highlight with 0-second duration, highlight already processing (reject) + +## Steps + +1. Create `backend/pipeline/shorts_generator.py`: + - Define `PRESETS` dict mapping FormatPreset → (width, height, ffmpeg_vf_filter): + - vertical: 1080×1920, `scale=1080:-2,pad=1080:1920:(ow-iw)/2:(oh-ih)/2:black` + - square: 1080×1080, `crop=min(iw\,ih):min(iw\,ih),scale=1080:1080` + - horizontal: 1920×1080, `scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2:black` + - `extract_clip(input_path, output_path, start, end, vf_filter)` → runs ffmpeg via subprocess.run with timeout=300s, raises on non-zero return code + - `resolve_video_path(video_source_path, file_path)` → joins and validates file exists +2. Add `stage_generate_shorts` Celery task to `backend/pipeline/stages.py`: + - Takes highlight_candidate_id as argument + - Uses sync SQLAlchemy (existing `_SessionLocal` pattern) + - Loads HighlightCandidate with joined SourceVideo and KeyMoment + - Validates status is 'approved' and no existing 'processing' shorts + - Computes effective start/end (trim overrides key moment times) + - Resolves absolute video path via config.video_source_path + source_video.file_path + - For each FormatPreset: create GeneratedShort row (status=processing), run extract_clip to /tmp, upload to MinIO as `shorts/{highlight_id}/{preset_name}.mp4`, update row (status=complete, file_size, minio_object_key), clean up /tmp file. On error: set status=failed, store error_message, continue to next preset. + - Log progress per-preset. +3. Verify: module imports, task is registered in Celery. + +## Must-Haves + +- [ ] shorts_generator.py with 3 presets and extract_clip function +- [ ] resolve_video_path validates file exists before ffmpeg +- [ ] Celery task creates GeneratedShort rows and updates status per-preset +- [ ] Each preset independent — one failure doesn't block others +- [ ] Clips uploaded to MinIO under shorts/{highlight_id}/{preset}.mp4 +- [ ] Temp files cleaned up after upload + +## Verification + +- `cd backend && python -c "from pipeline.shorts_generator import extract_clip, PRESETS, resolve_video_path; print('generator OK')"` — module imports +- `cd backend && python -c "from pipeline.stages import stage_generate_shorts; print('task OK')"` — task imports +- `grep -q 'stage_generate_shorts' backend/pipeline/stages.py` — task registered + +## Observability Impact + +- Signals added: Per-preset structured log lines in Celery task (highlight_id, preset, status, duration_ms, file_size or error) +- How a future agent inspects: Query `generated_shorts` table WHERE highlight_candidate_id = X, check status and error_message columns +- Failure state exposed: error_message column stores ffmpeg stderr on failure; task-level exception logged to Celery worker output + +## Inputs + +- `backend/models.py` — GeneratedShort, FormatPreset, ShortStatus from T01 +- `backend/config.py` — video_source_path setting from T01 +- `backend/pipeline/stages.py` — existing Celery task patterns +- `backend/minio_client.py` — upload_file function +- `backend/worker.py` — Celery app instance + +## Expected Output + +- `backend/pipeline/shorts_generator.py` — new ffmpeg wrapper module +- `backend/pipeline/stages.py` — updated with stage_generate_shorts task + +## Inputs + +- `backend/models.py` +- `backend/config.py` +- `backend/pipeline/stages.py` +- `backend/minio_client.py` +- `backend/worker.py` + +## Expected Output + +- `backend/pipeline/shorts_generator.py` +- `backend/pipeline/stages.py` + +## Verification + +cd backend && python -c "from pipeline.shorts_generator import extract_clip, PRESETS, resolve_video_path; print('OK')" && python -c "from pipeline.stages import stage_generate_shorts; print('OK')" diff --git a/.gsd/milestones/M023/slices/S03/tasks/T03-PLAN.md b/.gsd/milestones/M023/slices/S03/tasks/T03-PLAN.md new file mode 100644 index 0000000..834424c --- /dev/null +++ b/.gsd/milestones/M023/slices/S03/tasks/T03-PLAN.md @@ -0,0 +1,89 @@ +--- +estimated_steps: 48 +estimated_files: 5 +skills_used: [] +--- + +# T03: Add shorts API endpoints and frontend generate button with status display + +## Description + +Expose the shorts pipeline through API endpoints (trigger generation, list shorts, download link) and add a "Generate Shorts" button to the HighlightQueue UI for approved highlights with status badges and download links. + +## Steps + +1. Create `backend/routers/shorts.py`: + - `POST /admin/shorts/generate/{highlight_id}` — validate highlight exists and is approved, check no shorts already processing, dispatch `stage_generate_shorts.delay(highlight_id)`, return 202 with status. Use async SQLAlchemy session (FastAPI pattern from other routers). + - `GET /admin/shorts/{highlight_id}` — list all GeneratedShort rows for a highlight with status, format_preset, file_size_bytes, created_at. + - `GET /admin/shorts/download/{short_id}` — look up GeneratedShort by id, validate status is complete, return presigned MinIO URL via `minio_client.generate_download_url()`. + - Response schemas: `GeneratedShortResponse` (id, highlight_candidate_id, format_preset, status, error_message, file_size_bytes, duration_secs, width, height, created_at), `ShortsListResponse` (shorts: list[GeneratedShortResponse]), `GenerateResponse` (status, message). +2. Register router in `backend/main.py`: `app.include_router(shorts.router, prefix="/api/v1")` +3. Create `frontend/src/api/shorts.ts`: + - `generateShorts(highlightId: string)` → POST + - `fetchShorts(highlightId: string)` → GET list + - `getShortDownloadUrl(shortId: string)` → GET download URL + - Types: `GeneratedShort`, `ShortsListResponse` +4. Update `frontend/src/pages/HighlightQueue.tsx`: + - Add "Generate Shorts" button on approved highlights (only when no shorts exist or all failed) + - Show generation status badges per-preset (pending/processing/complete/failed) + - Show download links for completed shorts + - Poll for status updates while any shorts are processing (5s interval) + - Add shorts state to component (Map) +5. Update `frontend/src/pages/HighlightQueue.module.css` with styles for generate button, status badges, download links. + +## Must-Haves + +- [ ] Three API endpoints: generate trigger, list, download +- [ ] Router registered in main.py +- [ ] Frontend API client with types +- [ ] Generate button visible only on approved highlights +- [ ] Status badges show per-preset state +- [ ] Download links for completed shorts +- [ ] Polling while shorts are processing + +## Verification + +- `grep -q 'shorts' backend/main.py` — router registered +- `cd backend && python -c "from routers.shorts import router; print('router OK')"` — router imports +- `cd frontend && npx tsc --noEmit` — TypeScript compiles +- `cd frontend && npm run build` — frontend builds + +## Inputs + +- `backend/models.py` — GeneratedShort, FormatPreset, ShortStatus from T01 +- `backend/pipeline/stages.py` — stage_generate_shorts task from T02 +- `backend/minio_client.py` — generate_download_url function +- `backend/main.py` — existing router registration pattern +- `frontend/src/pages/HighlightQueue.tsx` — existing highlight queue UI +- `frontend/src/pages/HighlightQueue.module.css` — existing styles +- `frontend/src/api/highlights.ts` — existing API client pattern + +## Expected Output + +- `backend/routers/shorts.py` — new router file +- `backend/main.py` — updated with shorts router +- `frontend/src/api/shorts.ts` — new API client +- `frontend/src/pages/HighlightQueue.tsx` — updated with generate button and status +- `frontend/src/pages/HighlightQueue.module.css` — updated with new styles + +## Inputs + +- `backend/models.py` +- `backend/pipeline/stages.py` +- `backend/minio_client.py` +- `backend/main.py` +- `frontend/src/pages/HighlightQueue.tsx` +- `frontend/src/pages/HighlightQueue.module.css` +- `frontend/src/api/highlights.ts` + +## Expected Output + +- `backend/routers/shorts.py` +- `backend/main.py` +- `frontend/src/api/shorts.ts` +- `frontend/src/pages/HighlightQueue.tsx` +- `frontend/src/pages/HighlightQueue.module.css` + +## Verification + +grep -q 'shorts' backend/main.py && cd backend && python -c "from routers.shorts import router; print('OK')" && cd ../frontend && npx tsc --noEmit diff --git a/alembic/versions/025_add_generated_shorts.py b/alembic/versions/025_add_generated_shorts.py new file mode 100644 index 0000000..156af79 --- /dev/null +++ b/alembic/versions/025_add_generated_shorts.py @@ -0,0 +1,45 @@ +"""Add generated_shorts table with format_preset and short_status enums. + +Revision ID: 025_add_generated_shorts +Revises: 024_add_posts_and_attachments +""" + +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID + +from alembic import op + +revision = "025_add_generated_shorts" +down_revision = "024_add_posts_and_attachments" +branch_labels = None +depends_on = None + +format_preset_enum = sa.Enum("vertical", "square", "horizontal", name="format_preset") +short_status_enum = sa.Enum("pending", "processing", "complete", "failed", name="short_status") + + +def upgrade() -> None: + format_preset_enum.create(op.get_bind(), checkfirst=True) + short_status_enum.create(op.get_bind(), checkfirst=True) + + op.create_table( + "generated_shorts", + sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.func.gen_random_uuid()), + sa.Column("highlight_candidate_id", UUID(as_uuid=True), sa.ForeignKey("highlight_candidates.id", ondelete="CASCADE"), nullable=False, index=True), + sa.Column("format_preset", format_preset_enum, nullable=False), + sa.Column("minio_object_key", sa.String(1000), nullable=True), + sa.Column("duration_secs", sa.Float, nullable=True), + sa.Column("width", sa.Integer, nullable=False), + sa.Column("height", sa.Integer, nullable=False), + sa.Column("file_size_bytes", sa.BigInteger, nullable=True), + sa.Column("status", short_status_enum, nullable=False, server_default="pending"), + sa.Column("error_message", sa.Text, nullable=True), + sa.Column("created_at", sa.DateTime, nullable=False, server_default=sa.func.now()), + sa.Column("updated_at", sa.DateTime, nullable=False, server_default=sa.func.now()), + ) + + +def downgrade() -> None: + op.drop_table("generated_shorts") + short_status_enum.drop(op.get_bind(), checkfirst=True) + format_preset_enum.drop(op.get_bind(), checkfirst=True) diff --git a/backend/config.py b/backend/config.py index 11e963e..5336275 100644 --- a/backend/config.py +++ b/backend/config.py @@ -81,6 +81,7 @@ class Settings(BaseSettings): # File storage transcript_storage_path: str = "/data/transcripts" video_metadata_path: str = "/data/video_meta" + video_source_path: str = "/videos" # Git commit SHA (set at Docker build time or via env var) git_commit_sha: str = "unknown" diff --git a/backend/models.py b/backend/models.py index 94df94f..1d77731 100644 --- a/backend/models.py +++ b/backend/models.py @@ -814,3 +814,55 @@ class PostAttachment(Base): # relationships post: Mapped[Post] = sa_relationship(back_populates="attachments") + + +# ── Shorts Generation ──────────────────────────────────────────────────────── + +class FormatPreset(str, enum.Enum): + """Output format presets for generated shorts.""" + vertical = "vertical" # 9:16 (1080x1920) + square = "square" # 1:1 (1080x1080) + horizontal = "horizontal" # 16:9 (1920x1080) + + +class ShortStatus(str, enum.Enum): + """Processing status for a generated short.""" + pending = "pending" + processing = "processing" + complete = "complete" + failed = "failed" + + +class GeneratedShort(Base): + """A video short generated from a highlight candidate in a specific format.""" + __tablename__ = "generated_shorts" + + id: Mapped[uuid.UUID] = _uuid_pk() + highlight_candidate_id: Mapped[uuid.UUID] = mapped_column( + ForeignKey("highlight_candidates.id", ondelete="CASCADE"), + nullable=False, index=True, + ) + format_preset: Mapped[FormatPreset] = mapped_column( + Enum(FormatPreset, name="format_preset", create_constraint=True), + nullable=False, + ) + minio_object_key: Mapped[str | None] = mapped_column(String(1000), nullable=True) + duration_secs: Mapped[float | None] = mapped_column(Float, nullable=True) + width: Mapped[int] = mapped_column(Integer, nullable=False) + height: Mapped[int] = mapped_column(Integer, nullable=False) + file_size_bytes: Mapped[int | None] = mapped_column(BigInteger, nullable=True) + status: Mapped[ShortStatus] = mapped_column( + Enum(ShortStatus, name="short_status", create_constraint=True), + default=ShortStatus.pending, + server_default="pending", + ) + error_message: Mapped[str | None] = mapped_column(Text, nullable=True) + created_at: Mapped[datetime] = mapped_column( + default=_now, server_default=func.now() + ) + updated_at: Mapped[datetime] = mapped_column( + default=_now, server_default=func.now(), onupdate=_now + ) + + # relationships + highlight_candidate: Mapped[HighlightCandidate] = sa_relationship() diff --git a/docker-compose.yml b/docker-compose.yml index a4e8327..5d16c98 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -125,6 +125,7 @@ services: volumes: - /vmPool/r/services/chrysopedia_data:/data - ./config:/config:ro + - /vmPool/r/services/chrysopedia_videos:/videos:ro depends_on: chrysopedia-db: condition: service_healthy @@ -165,6 +166,7 @@ services: - /vmPool/r/services/chrysopedia_data:/data - ./prompts:/prompts:ro - ./config:/config:ro + - /vmPool/r/services/chrysopedia_videos:/videos:ro depends_on: chrysopedia-db: condition: service_healthy diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api index ba9a122..7dd27ea 100644 --- a/docker/Dockerfile.api +++ b/docker/Dockerfile.api @@ -4,7 +4,7 @@ WORKDIR /app # System deps RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc libpq-dev curl \ + gcc libpq-dev curl ffmpeg \ && rm -rf /var/lib/apt/lists/* # Python deps (cached layer)