- "backend/pipeline/highlight_scorer.py" - "backend/pipeline/highlight_schemas.py" - "backend/pipeline/test_highlight_scorer.py" GSD-Task: S05/T01
63 lines
2.6 KiB
Python
63 lines
2.6 KiB
Python
"""Pydantic schemas for highlight detection pipeline.
|
|
|
|
Covers scoring breakdown, candidate responses, and batch result summaries.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import uuid
|
|
from datetime import datetime
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class HighlightScoreBreakdown(BaseModel):
|
|
"""Per-dimension score breakdown for a highlight candidate.
|
|
|
|
Each field is a float in [0, 1] representing the normalized score
|
|
for that scoring dimension.
|
|
"""
|
|
|
|
duration_score: float = Field(description="Score based on moment duration (sweet-spot curve)")
|
|
content_density_score: float = Field(description="Score based on transcript richness / word density")
|
|
technique_relevance_score: float = Field(description="Score based on content_type and plugin mentions")
|
|
position_score: float = Field(description="Score based on temporal position within the video")
|
|
uniqueness_score: float = Field(description="Score based on title/topic distinctness among siblings")
|
|
engagement_proxy_score: float = Field(description="Proxy engagement signal from summary quality/length")
|
|
plugin_diversity_score: float = Field(description="Score based on breadth of plugins/tools mentioned")
|
|
speech_rate_variance_score: float = Field(
|
|
default=0.5,
|
|
description="Score based on speech rate variation (emphasis shifts) from word timings",
|
|
)
|
|
pause_density_score: float = Field(
|
|
default=0.5,
|
|
description="Score based on strategic pause frequency from word timings",
|
|
)
|
|
speaking_pace_score: float = Field(
|
|
default=0.5,
|
|
description="Score based on words-per-second fitness for teaching pace",
|
|
)
|
|
|
|
|
|
class HighlightCandidateResponse(BaseModel):
|
|
"""API response schema for a single highlight candidate."""
|
|
|
|
id: uuid.UUID
|
|
key_moment_id: uuid.UUID
|
|
source_video_id: uuid.UUID
|
|
score: float = Field(ge=0.0, le=1.0, description="Composite highlight score")
|
|
score_breakdown: HighlightScoreBreakdown
|
|
duration_secs: float = Field(ge=0.0, description="Duration of the key moment in seconds")
|
|
status: str = Field(description="One of: candidate, approved, rejected")
|
|
created_at: datetime
|
|
|
|
model_config = {"from_attributes": True}
|
|
|
|
|
|
class HighlightBatchResult(BaseModel):
|
|
"""Summary of a highlight scoring batch run for one video."""
|
|
|
|
video_id: uuid.UUID
|
|
candidates_created: int = Field(ge=0, description="Number of new candidates inserted")
|
|
candidates_updated: int = Field(ge=0, description="Number of existing candidates re-scored")
|
|
top_score: float = Field(ge=0.0, le=1.0, description="Highest score in this batch")
|