feat: Wired word-timing extraction into stage_highlight_detection — 62…
- "backend/pipeline/stages.py" - ".gsd/KNOWLEDGE.md" GSD-Task: S05/T02
This commit is contained in:
parent
52df9c0dc2
commit
7cc9497f3c
1 changed files with 52 additions and 2 deletions
|
|
@ -12,6 +12,7 @@ from __future__ import annotations
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
|
@ -2449,7 +2450,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
||||||
|
|
||||||
Returns the video_id for chain compatibility.
|
Returns the video_id for chain compatibility.
|
||||||
"""
|
"""
|
||||||
from pipeline.highlight_scorer import score_moment
|
from pipeline.highlight_scorer import extract_word_timings, score_moment
|
||||||
|
|
||||||
start = time.monotonic()
|
start = time.monotonic()
|
||||||
logger.info("Highlight detection starting for video_id=%s", video_id)
|
logger.info("Highlight detection starting for video_id=%s", video_id)
|
||||||
|
|
@ -2457,6 +2458,47 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
||||||
|
|
||||||
session = _get_sync_session()
|
session = _get_sync_session()
|
||||||
try:
|
try:
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Load transcript data once for the entire video (word-level timing)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
transcript_data: list | None = None
|
||||||
|
source_video = session.execute(
|
||||||
|
select(SourceVideo).where(SourceVideo.id == video_id)
|
||||||
|
).scalar_one_or_none()
|
||||||
|
|
||||||
|
if source_video and source_video.transcript_path:
|
||||||
|
transcript_file = source_video.transcript_path
|
||||||
|
try:
|
||||||
|
with open(transcript_file, "r") as fh:
|
||||||
|
raw = json.load(fh)
|
||||||
|
# Accept both {"segments": [...]} and bare [...]
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
transcript_data = raw.get("segments", raw.get("results", []))
|
||||||
|
elif isinstance(raw, list):
|
||||||
|
transcript_data = raw
|
||||||
|
else:
|
||||||
|
transcript_data = None
|
||||||
|
if transcript_data:
|
||||||
|
logger.info(
|
||||||
|
"Loaded transcript for video_id=%s (%d segments)",
|
||||||
|
video_id, len(transcript_data),
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning(
|
||||||
|
"Transcript file not found for video_id=%s: %s",
|
||||||
|
video_id, transcript_file,
|
||||||
|
)
|
||||||
|
except (json.JSONDecodeError, OSError) as io_exc:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to load transcript for video_id=%s: %s",
|
||||||
|
video_id, io_exc,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"No transcript_path for video_id=%s — audio proxy signals will be neutral",
|
||||||
|
video_id,
|
||||||
|
)
|
||||||
|
|
||||||
moments = (
|
moments = (
|
||||||
session.execute(
|
session.execute(
|
||||||
select(KeyMoment)
|
select(KeyMoment)
|
||||||
|
|
@ -2480,6 +2522,13 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
||||||
candidate_count = 0
|
candidate_count = 0
|
||||||
for moment in moments:
|
for moment in moments:
|
||||||
try:
|
try:
|
||||||
|
# Extract word-level timings for this moment's window
|
||||||
|
word_timings = None
|
||||||
|
if transcript_data:
|
||||||
|
word_timings = extract_word_timings(
|
||||||
|
transcript_data, moment.start_time, moment.end_time,
|
||||||
|
) or None # empty list → None for neutral fallback
|
||||||
|
|
||||||
result = score_moment(
|
result = score_moment(
|
||||||
start_time=moment.start_time,
|
start_time=moment.start_time,
|
||||||
end_time=moment.end_time,
|
end_time=moment.end_time,
|
||||||
|
|
@ -2489,6 +2538,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
||||||
raw_transcript=moment.raw_transcript,
|
raw_transcript=moment.raw_transcript,
|
||||||
source_quality=None, # filled below if technique_page loaded
|
source_quality=None, # filled below if technique_page loaded
|
||||||
video_content_type=None, # filled below if source_video loaded
|
video_content_type=None, # filled below if source_video loaded
|
||||||
|
word_timings=word_timings,
|
||||||
)
|
)
|
||||||
except Exception as score_exc:
|
except Exception as score_exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|
@ -2509,7 +2559,7 @@ def stage_highlight_detection(self, video_id: str, run_id: str | None = None) ->
|
||||||
duration_secs=result["duration_secs"],
|
duration_secs=result["duration_secs"],
|
||||||
)
|
)
|
||||||
stmt = stmt.on_conflict_do_update(
|
stmt = stmt.on_conflict_do_update(
|
||||||
constraint="uq_highlight_candidate_moment",
|
constraint="highlight_candidates_key_moment_id_key",
|
||||||
set_={
|
set_={
|
||||||
"score": stmt.excluded.score,
|
"score": stmt.excluded.score,
|
||||||
"score_breakdown": stmt.excluded.score_breakdown,
|
"score_breakdown": stmt.excluded.score_breakdown,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue