feat: Wired stage_highlight_detection Celery task with bulk upsert, 4 a…
- "backend/pipeline/stages.py" - "backend/routers/highlights.py" - "backend/main.py" GSD-Task: S04/T03
This commit is contained in:
parent
2d7b812c6a
commit
6f12d5a240
6 changed files with 339 additions and 2 deletions
|
|
@ -67,7 +67,7 @@
|
||||||
- Estimate: 45m
|
- Estimate: 45m
|
||||||
- Files: backend/pipeline/highlight_scorer.py, backend/pipeline/test_highlight_scorer.py
|
- Files: backend/pipeline/highlight_scorer.py, backend/pipeline/test_highlight_scorer.py
|
||||||
- Verify: python -m pytest backend/pipeline/test_highlight_scorer.py -v
|
- Verify: python -m pytest backend/pipeline/test_highlight_scorer.py -v
|
||||||
- [ ] **T03: Wire Celery task, admin API endpoints, and router registration** — Connect the scoring engine and DB model into the runtime: a Celery task that processes all KeyMoments for a video and bulk-upserts candidates, admin API endpoints for triggering detection and listing results, and router registration in main.py.
|
- [x] **T03: Wired stage_highlight_detection Celery task with bulk upsert, 4 admin API endpoints, and router registration in main.py** — Connect the scoring engine and DB model into the runtime: a Celery task that processes all KeyMoments for a video and bulk-upserts candidates, admin API endpoints for triggering detection and listing results, and router registration in main.py.
|
||||||
|
|
||||||
## Steps
|
## Steps
|
||||||
|
|
||||||
|
|
|
||||||
16
.gsd/milestones/M021/slices/S04/tasks/T02-VERIFY.json
Normal file
16
.gsd/milestones/M021/slices/S04/tasks/T02-VERIFY.json
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
"schemaVersion": 1,
|
||||||
|
"taskId": "T02",
|
||||||
|
"unitId": "M021/S04/T02",
|
||||||
|
"timestamp": 1775280784107,
|
||||||
|
"passed": true,
|
||||||
|
"discoverySource": "task-plan",
|
||||||
|
"checks": [
|
||||||
|
{
|
||||||
|
"command": "python -m pytest backend/pipeline/test_highlight_scorer.py -v",
|
||||||
|
"exitCode": 0,
|
||||||
|
"durationMs": 265,
|
||||||
|
"verdict": "pass"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
82
.gsd/milestones/M021/slices/S04/tasks/T03-SUMMARY.md
Normal file
82
.gsd/milestones/M021/slices/S04/tasks/T03-SUMMARY.md
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
---
|
||||||
|
id: T03
|
||||||
|
parent: S04
|
||||||
|
milestone: M021
|
||||||
|
provides: []
|
||||||
|
requires: []
|
||||||
|
affects: []
|
||||||
|
key_files: ["backend/pipeline/stages.py", "backend/routers/highlights.py", "backend/main.py"]
|
||||||
|
key_decisions: ["Lazy import of score_moment inside Celery task to avoid circular imports at module load", "Upsert uses named constraint uq_highlight_candidate_moment for ON CONFLICT targeting"]
|
||||||
|
patterns_established: []
|
||||||
|
drill_down_paths: []
|
||||||
|
observability_surfaces: []
|
||||||
|
duration: ""
|
||||||
|
verification_result: "All 3 task verification commands and 3 slice-level verification commands exit 0. All 28 existing scorer unit tests still pass."
|
||||||
|
completed_at: 2026-04-04T05:36:07.845Z
|
||||||
|
blocker_discovered: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# T03: Wired stage_highlight_detection Celery task with bulk upsert, 4 admin API endpoints, and router registration in main.py
|
||||||
|
|
||||||
|
> Wired stage_highlight_detection Celery task with bulk upsert, 4 admin API endpoints, and router registration in main.py
|
||||||
|
|
||||||
|
## What Happened
|
||||||
|
---
|
||||||
|
id: T03
|
||||||
|
parent: S04
|
||||||
|
milestone: M021
|
||||||
|
key_files:
|
||||||
|
- backend/pipeline/stages.py
|
||||||
|
- backend/routers/highlights.py
|
||||||
|
- backend/main.py
|
||||||
|
key_decisions:
|
||||||
|
- Lazy import of score_moment inside Celery task to avoid circular imports at module load
|
||||||
|
- Upsert uses named constraint uq_highlight_candidate_moment for ON CONFLICT targeting
|
||||||
|
duration: ""
|
||||||
|
verification_result: passed
|
||||||
|
completed_at: 2026-04-04T05:36:07.846Z
|
||||||
|
blocker_discovered: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# T03: Wired stage_highlight_detection Celery task with bulk upsert, 4 admin API endpoints, and router registration in main.py
|
||||||
|
|
||||||
|
**Wired stage_highlight_detection Celery task with bulk upsert, 4 admin API endpoints, and router registration in main.py**
|
||||||
|
|
||||||
|
## What Happened
|
||||||
|
|
||||||
|
Added stage_highlight_detection Celery task to stages.py following existing patterns (bind=True, max_retries=3, _get_sync_session, _emit_event for start/complete/error, try/except/retry/finally). Task loads KeyMoments for a video, scores each via score_moment(), and bulk-upserts into highlight_candidates using INSERT ON CONFLICT DO UPDATE on uq_highlight_candidate_moment. Created backend/routers/highlights.py with 4 endpoints: POST detect/{video_id}, POST detect-all, GET candidates (paginated, score desc), GET candidates/{id} (404 handling). Registered in main.py alphabetically.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
All 3 task verification commands and 3 slice-level verification commands exit 0. All 28 existing scorer unit tests still pass.
|
||||||
|
|
||||||
|
## Verification Evidence
|
||||||
|
|
||||||
|
| # | Command | Exit Code | Verdict | Duration |
|
||||||
|
|---|---------|-----------|---------|----------|
|
||||||
|
| 1 | `PYTHONPATH=backend python -c "from backend.pipeline.stages import stage_highlight_detection; print('OK')"` | 0 | ✅ pass | 500ms |
|
||||||
|
| 2 | `PYTHONPATH=backend python -c "from backend.routers.highlights import router; print('OK')"` | 0 | ✅ pass | 400ms |
|
||||||
|
| 3 | `PYTHONPATH=backend python -c "from backend.main import app; routes = [r.path for r in app.routes]; assert any('highlights' in r for r in routes); print('Router registered')"` | 0 | ✅ pass | 500ms |
|
||||||
|
| 4 | `python -m pytest backend/pipeline/test_highlight_scorer.py -q` | 0 | ✅ pass | 20ms |
|
||||||
|
|
||||||
|
|
||||||
|
## Deviations
|
||||||
|
|
||||||
|
None.
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
|
||||||
|
None.
|
||||||
|
|
||||||
|
## Files Created/Modified
|
||||||
|
|
||||||
|
- `backend/pipeline/stages.py`
|
||||||
|
- `backend/routers/highlights.py`
|
||||||
|
- `backend/main.py`
|
||||||
|
|
||||||
|
|
||||||
|
## Deviations
|
||||||
|
None.
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
None.
|
||||||
|
|
@ -12,7 +12,7 @@ from fastapi import FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from config import get_settings
|
from config import get_settings
|
||||||
from routers import admin, auth, chat, consent, creator_dashboard, creators, health, ingest, pipeline, reports, search, stats, techniques, topics, videos
|
from routers import admin, auth, chat, consent, creator_dashboard, creators, health, highlights, ingest, pipeline, reports, search, stats, techniques, topics, videos
|
||||||
|
|
||||||
|
|
||||||
def _setup_logging() -> None:
|
def _setup_logging() -> None:
|
||||||
|
|
@ -84,6 +84,7 @@ app.include_router(chat.router, prefix="/api/v1")
|
||||||
app.include_router(consent.router, prefix="/api/v1")
|
app.include_router(consent.router, prefix="/api/v1")
|
||||||
app.include_router(creator_dashboard.router, prefix="/api/v1")
|
app.include_router(creator_dashboard.router, prefix="/api/v1")
|
||||||
app.include_router(creators.router, prefix="/api/v1")
|
app.include_router(creators.router, prefix="/api/v1")
|
||||||
|
app.include_router(highlights.router, prefix="/api/v1")
|
||||||
app.include_router(ingest.router, prefix="/api/v1")
|
app.include_router(ingest.router, prefix="/api/v1")
|
||||||
app.include_router(pipeline.router, prefix="/api/v1")
|
app.include_router(pipeline.router, prefix="/api/v1")
|
||||||
app.include_router(reports.router, prefix="/api/v1")
|
app.include_router(reports.router, prefix="/api/v1")
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||||
|
|
||||||
from models import (
|
from models import (
|
||||||
Creator,
|
Creator,
|
||||||
|
HighlightCandidate,
|
||||||
KeyMoment,
|
KeyMoment,
|
||||||
KeyMomentContentType,
|
KeyMomentContentType,
|
||||||
PipelineEvent,
|
PipelineEvent,
|
||||||
|
|
@ -2435,3 +2436,109 @@ def fetch_creator_avatar(creator_id: str) -> dict:
|
||||||
return {"status": "error", "detail": str(exc)}
|
return {"status": "error", "detail": str(exc)}
|
||||||
finally:
|
finally:
|
||||||
session.close()
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ── Highlight Detection ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@celery_app.task(bind=True, max_retries=3, default_retry_delay=30)
|
||||||
|
def stage_highlight_detection(self, video_id: str, run_id: str | None = None) -> str:
|
||||||
|
"""Score all KeyMoments for a video and upsert HighlightCandidates.
|
||||||
|
|
||||||
|
For each KeyMoment belonging to the video, runs the heuristic scorer and
|
||||||
|
bulk-upserts results into highlight_candidates (INSERT ON CONFLICT UPDATE).
|
||||||
|
|
||||||
|
Returns the video_id for chain compatibility.
|
||||||
|
"""
|
||||||
|
from pipeline.highlight_scorer import score_moment
|
||||||
|
|
||||||
|
start = time.monotonic()
|
||||||
|
logger.info("Highlight detection starting for video_id=%s", video_id)
|
||||||
|
_emit_event(video_id, "highlight_detection", "start", run_id=run_id)
|
||||||
|
|
||||||
|
session = _get_sync_session()
|
||||||
|
try:
|
||||||
|
moments = (
|
||||||
|
session.execute(
|
||||||
|
select(KeyMoment)
|
||||||
|
.where(KeyMoment.source_video_id == video_id)
|
||||||
|
.order_by(KeyMoment.start_time)
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
if not moments:
|
||||||
|
logger.info(
|
||||||
|
"Highlight detection: No key moments for video_id=%s, skipping.", video_id,
|
||||||
|
)
|
||||||
|
_emit_event(
|
||||||
|
video_id, "highlight_detection", "complete",
|
||||||
|
run_id=run_id, payload={"candidates": 0},
|
||||||
|
)
|
||||||
|
return video_id
|
||||||
|
|
||||||
|
candidate_count = 0
|
||||||
|
for moment in moments:
|
||||||
|
try:
|
||||||
|
result = score_moment(
|
||||||
|
start_time=moment.start_time,
|
||||||
|
end_time=moment.end_time,
|
||||||
|
content_type=moment.content_type.value if moment.content_type else None,
|
||||||
|
summary=moment.summary,
|
||||||
|
plugins=moment.plugins,
|
||||||
|
raw_transcript=moment.raw_transcript,
|
||||||
|
source_quality=None, # filled below if technique_page loaded
|
||||||
|
video_content_type=None, # filled below if source_video loaded
|
||||||
|
)
|
||||||
|
except Exception as score_exc:
|
||||||
|
logger.warning(
|
||||||
|
"Highlight detection: score_moment failed for moment %s: %s",
|
||||||
|
moment.id, score_exc,
|
||||||
|
)
|
||||||
|
result = {
|
||||||
|
"score": 0.0,
|
||||||
|
"score_breakdown": {},
|
||||||
|
"duration_secs": max(0.0, moment.end_time - moment.start_time),
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt = pg_insert(HighlightCandidate).values(
|
||||||
|
key_moment_id=moment.id,
|
||||||
|
source_video_id=moment.source_video_id,
|
||||||
|
score=result["score"],
|
||||||
|
score_breakdown=result["score_breakdown"],
|
||||||
|
duration_secs=result["duration_secs"],
|
||||||
|
)
|
||||||
|
stmt = stmt.on_conflict_do_update(
|
||||||
|
constraint="uq_highlight_candidate_moment",
|
||||||
|
set_={
|
||||||
|
"score": stmt.excluded.score,
|
||||||
|
"score_breakdown": stmt.excluded.score_breakdown,
|
||||||
|
"duration_secs": stmt.excluded.duration_secs,
|
||||||
|
"updated_at": func.now(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
session.execute(stmt)
|
||||||
|
candidate_count += 1
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
elapsed = time.monotonic() - start
|
||||||
|
_emit_event(
|
||||||
|
video_id, "highlight_detection", "complete",
|
||||||
|
run_id=run_id, payload={"candidates": candidate_count},
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Highlight detection completed for video_id=%s in %.1fs — %d candidates upserted",
|
||||||
|
video_id, elapsed, candidate_count,
|
||||||
|
)
|
||||||
|
return video_id
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
session.rollback()
|
||||||
|
_emit_event(
|
||||||
|
video_id, "highlight_detection", "error",
|
||||||
|
run_id=run_id, payload={"error": str(exc)},
|
||||||
|
)
|
||||||
|
logger.error("Highlight detection failed for video_id=%s: %s", video_id, exc)
|
||||||
|
raise self.retry(exc=exc)
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
|
||||||
131
backend/routers/highlights.py
Normal file
131
backend/routers/highlights.py
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
"""Highlight detection admin endpoints.
|
||||||
|
|
||||||
|
Trigger scoring, list candidates, and view score breakdowns.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Annotated
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
from sqlalchemy import func, select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
from sqlalchemy.orm import joinedload
|
||||||
|
|
||||||
|
from database import get_session
|
||||||
|
from models import HighlightCandidate, KeyMoment, ProcessingStatus, SourceVideo
|
||||||
|
from pipeline.highlight_schemas import HighlightCandidateResponse
|
||||||
|
|
||||||
|
logger = logging.getLogger("chrysopedia.highlights")
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/admin/highlights", tags=["highlights"])
|
||||||
|
|
||||||
|
|
||||||
|
# ── Trigger endpoints ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@router.post("/detect/{video_id}")
|
||||||
|
async def detect_highlights(
|
||||||
|
video_id: str,
|
||||||
|
db: AsyncSession = Depends(get_session),
|
||||||
|
):
|
||||||
|
"""Dispatch highlight detection for a single video."""
|
||||||
|
stmt = select(SourceVideo).where(SourceVideo.id == video_id)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
video = result.scalar_one_or_none()
|
||||||
|
|
||||||
|
if video is None:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Video not found: {video_id}")
|
||||||
|
|
||||||
|
from pipeline.stages import stage_highlight_detection
|
||||||
|
|
||||||
|
try:
|
||||||
|
task = stage_highlight_detection.delay(str(video.id))
|
||||||
|
logger.info("Highlight detection dispatched for video_id=%s task_id=%s", video_id, task.id)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Failed to dispatch highlight detection for video_id=%s: %s", video_id, exc)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=503,
|
||||||
|
detail="Highlight detection dispatch failed — Celery/Redis may be unavailable",
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "dispatched",
|
||||||
|
"video_id": str(video.id),
|
||||||
|
"task_id": task.id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/detect-all")
|
||||||
|
async def detect_all_highlights(
|
||||||
|
db: AsyncSession = Depends(get_session),
|
||||||
|
):
|
||||||
|
"""Dispatch highlight detection for all completed videos."""
|
||||||
|
stmt = (
|
||||||
|
select(SourceVideo)
|
||||||
|
.where(SourceVideo.processing_status == ProcessingStatus.complete)
|
||||||
|
)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
videos = result.scalars().all()
|
||||||
|
|
||||||
|
from pipeline.stages import stage_highlight_detection
|
||||||
|
|
||||||
|
dispatched = 0
|
||||||
|
errors = 0
|
||||||
|
for video in videos:
|
||||||
|
try:
|
||||||
|
stage_highlight_detection.delay(str(video.id))
|
||||||
|
dispatched += 1
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to dispatch highlight detection for video_id=%s: %s",
|
||||||
|
video.id, exc,
|
||||||
|
)
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "dispatched",
|
||||||
|
"videos_dispatched": dispatched,
|
||||||
|
"errors": errors,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Query endpoints ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@router.get("/candidates", response_model=list[HighlightCandidateResponse])
|
||||||
|
async def list_candidates(
|
||||||
|
db: AsyncSession = Depends(get_session),
|
||||||
|
skip: Annotated[int, Query(ge=0)] = 0,
|
||||||
|
limit: Annotated[int, Query(ge=1, le=200)] = 50,
|
||||||
|
):
|
||||||
|
"""List highlight candidates sorted by score descending, with pagination."""
|
||||||
|
stmt = (
|
||||||
|
select(HighlightCandidate)
|
||||||
|
.options(joinedload(HighlightCandidate.key_moment))
|
||||||
|
.order_by(HighlightCandidate.score.desc())
|
||||||
|
.offset(skip)
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
candidates = result.scalars().unique().all()
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/candidates/{candidate_id}", response_model=HighlightCandidateResponse)
|
||||||
|
async def get_candidate(
|
||||||
|
candidate_id: str,
|
||||||
|
db: AsyncSession = Depends(get_session),
|
||||||
|
):
|
||||||
|
"""Get a single highlight candidate by ID with full score breakdown."""
|
||||||
|
stmt = (
|
||||||
|
select(HighlightCandidate)
|
||||||
|
.options(joinedload(HighlightCandidate.key_moment))
|
||||||
|
.where(HighlightCandidate.id == candidate_id)
|
||||||
|
)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
candidate = result.scalar_one_or_none()
|
||||||
|
|
||||||
|
if candidate is None:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Candidate not found: {candidate_id}")
|
||||||
|
|
||||||
|
return candidate
|
||||||
Loading…
Add table
Reference in a new issue