feat: Wired word-timing extraction into stage_highlight_detection — 62…

- "backend/pipeline/stages.py" - ".gsd/KNOWLEDGE.md" GSD-Task: S05/T02
2026-04-04 08:11:32 +00:00 · 2026-04-04 08:11:32 +00:00 · 7cc9497f3c
commit 7cc9497f3c
parent 52df9c0dc2
1 changed files with 52 additions and 2 deletions
--- a/backend/pipeline/stages.py
+++ b/backend/pipeline/stages.py
@ -12,6 +12,7 @@ from __future__ import annotations
 import hashlib
 import json
 import logging
+import os
 import re
 import subprocess
 import time
@ -2449,7 +2450,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->

    Returns the video_id for chain compatibility.
    """
-    from pipeline.highlight_scorer import score_moment
+    from pipeline.highlight_scorer import extract_word_timings, score_moment

    start = time.monotonic()
    logger.info("Highlight detection starting for video_id=%s", video_id)
@ -2457,6 +2458,47 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->

    session = _get_sync_session()
    try:
+        # ------------------------------------------------------------------
+        # Load transcript data once for the entire video (word-level timing)
+        # ------------------------------------------------------------------
+        transcript_data: list | None = None
+        source_video = session.execute(
+            select(SourceVideo).where(SourceVideo.id == video_id)
+        ).scalar_one_or_none()
+
+        if source_video and source_video.transcript_path:
+            transcript_file = source_video.transcript_path
+            try:
+                with open(transcript_file, "r") as fh:
+                    raw = json.load(fh)
+                # Accept both {"segments": [...]} and bare [...]
+                if isinstance(raw, dict):
+                    transcript_data = raw.get("segments", raw.get("results", []))
+                elif isinstance(raw, list):
+                    transcript_data = raw
+                else:
+                    transcript_data = None
+                if transcript_data:
+                    logger.info(
+                        "Loaded transcript for video_id=%s (%d segments)",
+                        video_id, len(transcript_data),
+                    )
+            except FileNotFoundError:
+                logger.warning(
+                    "Transcript file not found for video_id=%s: %s",
+                    video_id, transcript_file,
+                )
+            except (json.JSONDecodeError, OSError) as io_exc:
+                logger.warning(
+                    "Failed to load transcript for video_id=%s: %s",
+                    video_id, io_exc,
+                )
+        else:
+            logger.info(
+                "No transcript_path for video_id=%s — audio proxy signals will be neutral",
+                video_id,
+            )
+
        moments = (
            session.execute(
                select(KeyMoment)
@ -2480,6 +2522,13 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
        candidate_count = 0
        for moment in moments:
            try:
+                # Extract word-level timings for this moment's window
+                word_timings = None
+                if transcript_data:
+                    word_timings = extract_word_timings(
+                        transcript_data, moment.start_time, moment.end_time,
+                    ) or None  # empty list → None for neutral fallback
+
                result = score_moment(
                    start_time=moment.start_time,
                    end_time=moment.end_time,
@ -2489,6 +2538,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
                    raw_transcript=moment.raw_transcript,
                    source_quality=None,  # filled below if technique_page loaded
                    video_content_type=None,  # filled below if source_video loaded
+                    word_timings=word_timings,
                )
            except Exception as score_exc:
                logger.warning(
@ -2509,7 +2559,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
                duration_secs=result["duration_secs"],
            )
            stmt = stmt.on_conflict_do_update(
-                constraint="uq_highlight_candidate_moment",
+                constraint="highlight_candidates_key_moment_id_key",
                set_={
                    "score": stmt.excluded.score,
                    "score_breakdown": stmt.excluded.score_breakdown,