From 7cc9497f3cf08c58da7377b002e44110ac0efff0 Mon Sep 17 00:00:00 2001
From: jlightner <jlightner@users.noreply.github.com>
Date: Sat, 4 Apr 2026 08:11:32 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20Wired=20word-timing=20extraction=20into?=
 =?UTF-8?q?=20stage=5Fhighlight=5Fdetection=20=E2=80=94=2062=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- "backend/pipeline/stages.py"
- ".gsd/KNOWLEDGE.md"

GSD-Task: S05/T02
---
 backend/pipeline/stages.py | 54 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/backend/pipeline/stages.py b/backend/pipeline/stages.py
index 0add68a..5289679 100644
--- a/backend/pipeline/stages.py
+++ b/backend/pipeline/stages.py
@@ -12,6 +12,7 @@ from __future__ import annotations
 import hashlib
 import json
 import logging
+import os
 import re
 import subprocess
 import time
@@ -2449,7 +2450,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
 
     Returns the video_id for chain compatibility.
     """
-    from pipeline.highlight_scorer import score_moment
+    from pipeline.highlight_scorer import extract_word_timings, score_moment
 
     start = time.monotonic()
     logger.info("Highlight detection starting for video_id=%s", video_id)
@@ -2457,6 +2458,47 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
 
     session = _get_sync_session()
     try:
+        # ------------------------------------------------------------------
+        # Load transcript data once for the entire video (word-level timing)
+        # ------------------------------------------------------------------
+        transcript_data: list | None = None
+        source_video = session.execute(
+            select(SourceVideo).where(SourceVideo.id == video_id)
+        ).scalar_one_or_none()
+
+        if source_video and source_video.transcript_path:
+            transcript_file = source_video.transcript_path
+            try:
+                with open(transcript_file, "r") as fh:
+                    raw = json.load(fh)
+                # Accept both {"segments": [...]} and bare [...]
+                if isinstance(raw, dict):
+                    transcript_data = raw.get("segments", raw.get("results", []))
+                elif isinstance(raw, list):
+                    transcript_data = raw
+                else:
+                    transcript_data = None
+                if transcript_data:
+                    logger.info(
+                        "Loaded transcript for video_id=%s (%d segments)",
+                        video_id, len(transcript_data),
+                    )
+            except FileNotFoundError:
+                logger.warning(
+                    "Transcript file not found for video_id=%s: %s",
+                    video_id, transcript_file,
+                )
+            except (json.JSONDecodeError, OSError) as io_exc:
+                logger.warning(
+                    "Failed to load transcript for video_id=%s: %s",
+                    video_id, io_exc,
+                )
+        else:
+            logger.info(
+                "No transcript_path for video_id=%s — audio proxy signals will be neutral",
+                video_id,
+            )
+
         moments = (
             session.execute(
                 select(KeyMoment)
@@ -2480,6 +2522,13 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
         candidate_count = 0
         for moment in moments:
             try:
+                # Extract word-level timings for this moment's window
+                word_timings = None
+                if transcript_data:
+                    word_timings = extract_word_timings(
+                        transcript_data, moment.start_time, moment.end_time,
+                    ) or None  # empty list → None for neutral fallback
+
                 result = score_moment(
                     start_time=moment.start_time,
                     end_time=moment.end_time,
@@ -2489,6 +2538,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
                     raw_transcript=moment.raw_transcript,
                     source_quality=None,  # filled below if technique_page loaded
                     video_content_type=None,  # filled below if source_video loaded
+                    word_timings=word_timings,
                 )
             except Exception as score_exc:
                 logger.warning(
@@ -2509,7 +2559,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
                 duration_secs=result["duration_secs"],
             )
             stmt = stmt.on_conflict_do_update(
-                constraint="uq_highlight_candidate_moment",
+                constraint="highlight_candidates_key_moment_id_key",
                 set_={
                     "score": stmt.excluded.score,
                     "score_breakdown": stmt.excluded.score_breakdown,