feat: Wired word-timing extraction into stage_highlight_detection — 62…

- "backend/pipeline/stages.py" - ".gsd/KNOWLEDGE.md" GSD-Task: S05/T02
2026-04-04 08:11:32 +00:00 · 2026-04-04 08:11:32 +00:00 · 7cc9497f3c
commit 7cc9497f3c
parent 52df9c0dc2
1 changed files with 52 additions and 2 deletions
--- a/backend/pipeline/stages.py
+++ b/backend/pipeline/stages.py
@ -12,6 +12,7 @@ from __future__ import annotations
 import hashlib
 import json
 import logging
 import os
 import re
 import subprocess
 import time
@ -2449,7 +2450,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
    Returns the video_id for chain compatibility.
    """
-    from pipeline.highlight_scorer import score_moment
+    from pipeline.highlight_scorer import extract_word_timings, score_moment
    start = time.monotonic()
    logger.info("Highlight detection starting for video_id=%s", video_id)
@ -2457,6 +2458,47 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
    session = _get_sync_session()
    try:
        # ------------------------------------------------------------------
        # Load transcript data once for the entire video (word-level timing)
        # ------------------------------------------------------------------
        transcript_data: list | None = None
        source_video = session.execute(
            select(SourceVideo).where(SourceVideo.id == video_id)
        ).scalar_one_or_none()
        if source_video and source_video.transcript_path:
            transcript_file = source_video.transcript_path
            try:
                with open(transcript_file, "r") as fh:
                    raw = json.load(fh)
                # Accept both {"segments": [...]} and bare [...]
                if isinstance(raw, dict):
                    transcript_data = raw.get("segments", raw.get("results", []))
                elif isinstance(raw, list):
                    transcript_data = raw
                else:
                    transcript_data = None
                if transcript_data:
                    logger.info(
                        "Loaded transcript for video_id=%s (%d segments)",
                        video_id, len(transcript_data),
                    )
            except FileNotFoundError:
                logger.warning(
                    "Transcript file not found for video_id=%s: %s",
                    video_id, transcript_file,
                )
            except (json.JSONDecodeError, OSError) as io_exc:
                logger.warning(
                    "Failed to load transcript for video_id=%s: %s",
                    video_id, io_exc,
                )
        else:
            logger.info(
                "No transcript_path for video_id=%s — audio proxy signals will be neutral",
                video_id,
            )
        moments = (
            session.execute(
                select(KeyMoment)
@ -2480,6 +2522,13 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
        candidate_count = 0
        for moment in moments:
            try:
                # Extract word-level timings for this moment's window
                word_timings = None
                if transcript_data:
                    word_timings = extract_word_timings(
                        transcript_data, moment.start_time, moment.end_time,
                    ) or None  # empty list → None for neutral fallback
                result = score_moment(
                    start_time=moment.start_time,
                    end_time=moment.end_time,
@ -2489,6 +2538,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
                    raw_transcript=moment.raw_transcript,
                    source_quality=None,  # filled below if technique_page loaded
                    video_content_type=None,  # filled below if source_video loaded
                    word_timings=word_timings,
                )
            except Exception as score_exc:
                logger.warning(
@ -2509,7 +2559,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
                duration_secs=result["duration_secs"],
            )
            stmt = stmt.on_conflict_do_update(
-                constraint="uq_highlight_candidate_moment",
+                constraint="highlight_candidates_key_moment_id_key",
                set_={
                    "score": stmt.excluded.score,
                    "score_breakdown": stmt.excluded.score_breakdown,