chrysopedia/backend/pipeline/highlight_schemas.py
jlightner 27c5f4866b test: Added 3 audio proxy scoring functions, extract_word_timings utili…
- "backend/pipeline/highlight_scorer.py"
- "backend/pipeline/highlight_schemas.py"
- "backend/pipeline/test_highlight_scorer.py"

GSD-Task: S05/T01
2026-04-04 08:05:22 +00:00

63 lines
2.6 KiB
Python

"""Pydantic schemas for highlight detection pipeline.
Covers scoring breakdown, candidate responses, and batch result summaries.
"""
from __future__ import annotations
import uuid
from datetime import datetime
from pydantic import BaseModel, Field
class HighlightScoreBreakdown(BaseModel):
"""Per-dimension score breakdown for a highlight candidate.
Each field is a float in [0, 1] representing the normalized score
for that scoring dimension.
"""
duration_score: float = Field(description="Score based on moment duration (sweet-spot curve)")
content_density_score: float = Field(description="Score based on transcript richness / word density")
technique_relevance_score: float = Field(description="Score based on content_type and plugin mentions")
position_score: float = Field(description="Score based on temporal position within the video")
uniqueness_score: float = Field(description="Score based on title/topic distinctness among siblings")
engagement_proxy_score: float = Field(description="Proxy engagement signal from summary quality/length")
plugin_diversity_score: float = Field(description="Score based on breadth of plugins/tools mentioned")
speech_rate_variance_score: float = Field(
default=0.5,
description="Score based on speech rate variation (emphasis shifts) from word timings",
)
pause_density_score: float = Field(
default=0.5,
description="Score based on strategic pause frequency from word timings",
)
speaking_pace_score: float = Field(
default=0.5,
description="Score based on words-per-second fitness for teaching pace",
)
class HighlightCandidateResponse(BaseModel):
"""API response schema for a single highlight candidate."""
id: uuid.UUID
key_moment_id: uuid.UUID
source_video_id: uuid.UUID
score: float = Field(ge=0.0, le=1.0, description="Composite highlight score")
score_breakdown: HighlightScoreBreakdown
duration_secs: float = Field(ge=0.0, description="Duration of the key moment in seconds")
status: str = Field(description="One of: candidate, approved, rejected")
created_at: datetime
model_config = {"from_attributes": True}
class HighlightBatchResult(BaseModel):
"""Summary of a highlight scoring batch run for one video."""
video_id: uuid.UUID
candidates_created: int = Field(ge=0, description="Number of new candidates inserted")
candidates_updated: int = Field(ge=0, description="Number of existing candidates re-scored")
top_score: float = Field(ge=0.0, le=1.0, description="Highest score in this batch")