chrysopedia/backend/pipeline/highlight_schemas.py

"""Pydantic schemas for highlight detection pipeline.

Covers scoring breakdown, candidate responses, and batch result summaries.
"""

from __future__ import annotations

import uuid
from datetime import datetime

from pydantic import BaseModel, Field


class HighlightScoreBreakdown(BaseModel):
    """Per-dimension score breakdown for a highlight candidate.

    Each field is a float in [0, 1] representing the normalized score
    for that scoring dimension.
    """

    duration_score: float = Field(description="Score based on moment duration (sweet-spot curve)")
    content_density_score: float = Field(description="Score based on transcript richness / word density")
    technique_relevance_score: float = Field(description="Score based on content_type and plugin mentions")
    position_score: float = Field(description="Score based on temporal position within the video")
    uniqueness_score: float = Field(description="Score based on title/topic distinctness among siblings")
    engagement_proxy_score: float = Field(description="Proxy engagement signal from summary quality/length")
    plugin_diversity_score: float = Field(description="Score based on breadth of plugins/tools mentioned")
    speech_rate_variance_score: float = Field(
        default=0.5,
        description="Score based on speech rate variation (emphasis shifts) from word timings",
    )
    pause_density_score: float = Field(
        default=0.5,
        description="Score based on strategic pause frequency from word timings",
    )
    speaking_pace_score: float = Field(
        default=0.5,
        description="Score based on words-per-second fitness for teaching pace",
    )


class HighlightCandidateResponse(BaseModel):
    """API response schema for a single highlight candidate."""

    id: uuid.UUID
    key_moment_id: uuid.UUID
    source_video_id: uuid.UUID
    score: float = Field(ge=0.0, le=1.0, description="Composite highlight score")
    score_breakdown: HighlightScoreBreakdown
    duration_secs: float = Field(ge=0.0, description="Duration of the key moment in seconds")
    status: str = Field(description="One of: candidate, approved, rejected")
    created_at: datetime

    model_config = {"from_attributes": True}


class HighlightBatchResult(BaseModel):
    """Summary of a highlight scoring batch run for one video."""

    video_id: uuid.UUID
    candidates_created: int = Field(ge=0, description="Number of new candidates inserted")
    candidates_updated: int = Field(ge=0, description="Number of existing candidates re-scored")
    top_score: float = Field(ge=0.0, le=1.0, description="Highest score in this batch")