feat: Wired word-timing extraction into stage_highlight_detection — 62…
- "backend/pipeline/stages.py" - ".gsd/KNOWLEDGE.md" GSD-Task: S05/T02
This commit is contained in:
parent
52df9c0dc2
commit
7cc9497f3c
1 changed files with 52 additions and 2 deletions
|
|
@ -12,6 +12,7 @@ from __future__ import annotations
|
|||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
|
|
@ -2449,7 +2450,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
|||
|
||||
Returns the video_id for chain compatibility.
|
||||
"""
|
||||
from pipeline.highlight_scorer import score_moment
|
||||
from pipeline.highlight_scorer import extract_word_timings, score_moment
|
||||
|
||||
start = time.monotonic()
|
||||
logger.info("Highlight detection starting for video_id=%s", video_id)
|
||||
|
|
@ -2457,6 +2458,47 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
|||
|
||||
session = _get_sync_session()
|
||||
try:
|
||||
# ------------------------------------------------------------------
|
||||
# Load transcript data once for the entire video (word-level timing)
|
||||
# ------------------------------------------------------------------
|
||||
transcript_data: list | None = None
|
||||
source_video = session.execute(
|
||||
select(SourceVideo).where(SourceVideo.id == video_id)
|
||||
).scalar_one_or_none()
|
||||
|
||||
if source_video and source_video.transcript_path:
|
||||
transcript_file = source_video.transcript_path
|
||||
try:
|
||||
with open(transcript_file, "r") as fh:
|
||||
raw = json.load(fh)
|
||||
# Accept both {"segments": [...]} and bare [...]
|
||||
if isinstance(raw, dict):
|
||||
transcript_data = raw.get("segments", raw.get("results", []))
|
||||
elif isinstance(raw, list):
|
||||
transcript_data = raw
|
||||
else:
|
||||
transcript_data = None
|
||||
if transcript_data:
|
||||
logger.info(
|
||||
"Loaded transcript for video_id=%s (%d segments)",
|
||||
video_id, len(transcript_data),
|
||||
)
|
||||
except FileNotFoundError:
|
||||
logger.warning(
|
||||
"Transcript file not found for video_id=%s: %s",
|
||||
video_id, transcript_file,
|
||||
)
|
||||
except (json.JSONDecodeError, OSError) as io_exc:
|
||||
logger.warning(
|
||||
"Failed to load transcript for video_id=%s: %s",
|
||||
video_id, io_exc,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"No transcript_path for video_id=%s — audio proxy signals will be neutral",
|
||||
video_id,
|
||||
)
|
||||
|
||||
moments = (
|
||||
session.execute(
|
||||
select(KeyMoment)
|
||||
|
|
@ -2480,6 +2522,13 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
|||
candidate_count = 0
|
||||
for moment in moments:
|
||||
try:
|
||||
# Extract word-level timings for this moment's window
|
||||
word_timings = None
|
||||
if transcript_data:
|
||||
word_timings = extract_word_timings(
|
||||
transcript_data, moment.start_time, moment.end_time,
|
||||
) or None # empty list → None for neutral fallback
|
||||
|
||||
result = score_moment(
|
||||
start_time=moment.start_time,
|
||||
end_time=moment.end_time,
|
||||
|
|
@ -2489,6 +2538,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
|||
raw_transcript=moment.raw_transcript,
|
||||
source_quality=None, # filled below if technique_page loaded
|
||||
video_content_type=None, # filled below if source_video loaded
|
||||
word_timings=word_timings,
|
||||
)
|
||||
except Exception as score_exc:
|
||||
logger.warning(
|
||||
|
|
@ -2509,7 +2559,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
|||
duration_secs=result["duration_secs"],
|
||||
)
|
||||
stmt = stmt.on_conflict_do_update(
|
||||
constraint="uq_highlight_candidate_moment",
|
||||
constraint="highlight_candidates_key_moment_id_key",
|
||||
set_={
|
||||
"score": stmt.excluded.score,
|
||||
"score_breakdown": stmt.excluded.score_breakdown,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue