feat: Added HighlightCandidate ORM model, Alembic migration 019, and Py…

- "backend/models.py" - "alembic/versions/019_add_highlight_candidates.py" - "backend/pipeline/highlight_schemas.py" GSD-Task: S04/T01
2026-04-04 05:30:36 +00:00 · 2026-04-04 05:30:36 +00:00 · c374165865
commit c374165865
parent 90c24d8bf9
3 changed files with 139 additions and 0 deletions
--- a/alembic/versions/019_add_highlight_candidates.py
+++ b/alembic/versions/019_add_highlight_candidates.py
@ -0,0 +1,45 @@
 """Add highlight_candidates table for highlight detection scoring.
 Revision ID: 019_add_highlight_candidates
 Revises: 018_add_impersonation_log
 """
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects.postgresql import UUID
 revision = "019_add_highlight_candidates"
 down_revision = "018_add_impersonation_log"
 branch_labels = None
 depends_on = None
 def upgrade() -> None:
    # Create the highlight_status enum type
    highlight_status = sa.Enum("candidate", "approved", "rejected", name="highlight_status", create_constraint=True)
    highlight_status.create(op.get_bind(), checkfirst=True)
    op.create_table(
        "highlight_candidates",
        sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
        sa.Column("key_moment_id", UUID(as_uuid=True), sa.ForeignKey("key_moments.id", ondelete="CASCADE"), nullable=False, unique=True),
        sa.Column("source_video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False),
        sa.Column("score", sa.Float, nullable=False),
        sa.Column("score_breakdown", sa.dialects.postgresql.JSONB, nullable=True),
        sa.Column("duration_secs", sa.Float, nullable=False),
        sa.Column("status", highlight_status, nullable=False, server_default="candidate"),
        sa.Column("created_at", sa.DateTime, server_default=sa.func.now(), nullable=False),
        sa.Column("updated_at", sa.DateTime, server_default=sa.func.now(), nullable=False),
    )
    op.create_index("ix_highlight_candidates_source_video_id", "highlight_candidates", ["source_video_id"])
    op.create_index("ix_highlight_candidates_score_desc", "highlight_candidates", [sa.text("score DESC")])
    op.create_index("ix_highlight_candidates_status", "highlight_candidates", ["status"])
 def downgrade() -> None:
    op.drop_index("ix_highlight_candidates_status")
    op.drop_index("ix_highlight_candidates_score_desc")
    op.drop_index("ix_highlight_candidates_source_video_id")
    op.drop_table("highlight_candidates")
    sa.Enum(name="highlight_status").drop(op.get_bind(), checkfirst=True)
--- a/backend/models.py
+++ b/backend/models.py
@ -80,6 +80,13 @@ class UserRole(str, enum.Enum):
    admin = "admin"
 class HighlightStatus(str, enum.Enum):
    """Triage status for highlight candidates."""
    candidate = "candidate"
    approved = "approved"
    rejected = "rejected"
 # ── Helpers ──────────────────────────────────────────────────────────────────
 def _uuid_pk() -> Mapped[uuid.UUID]:
@ -674,3 +681,39 @@ class ImpersonationLog(Base):
    created_at: Mapped[datetime] = mapped_column(
        default=_now, server_default=func.now()
    )
 # ── Highlight Detection ─────────────────────────────────────────────────────
 class HighlightCandidate(Base):
    """Scored candidate for highlight detection, one per KeyMoment."""
    __tablename__ = "highlight_candidates"
    __table_args__ = (
        UniqueConstraint("key_moment_id", name="uq_highlight_candidate_moment"),
    )
    id: Mapped[uuid.UUID] = _uuid_pk()
    key_moment_id: Mapped[uuid.UUID] = mapped_column(
        ForeignKey("key_moments.id", ondelete="CASCADE"), nullable=False, unique=True,
    )
    source_video_id: Mapped[uuid.UUID] = mapped_column(
        ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False, index=True,
    )
    score: Mapped[float] = mapped_column(Float, nullable=False)
    score_breakdown: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
    duration_secs: Mapped[float] = mapped_column(Float, nullable=False)
    status: Mapped[HighlightStatus] = mapped_column(
        Enum(HighlightStatus, name="highlight_status", create_constraint=True),
        default=HighlightStatus.candidate,
        server_default="candidate",
    )
    created_at: Mapped[datetime] = mapped_column(
        default=_now, server_default=func.now()
    )
    updated_at: Mapped[datetime] = mapped_column(
        default=_now, server_default=func.now(), onupdate=_now
    )
    # relationships
    key_moment: Mapped[KeyMoment] = sa_relationship()
    source_video: Mapped[SourceVideo] = sa_relationship()
--- a/backend/pipeline/highlight_schemas.py
+++ b/backend/pipeline/highlight_schemas.py
@ -0,0 +1,51 @@
 """Pydantic schemas for highlight detection pipeline.
 Covers scoring breakdown, candidate responses, and batch result summaries.
 """
 from __future__ import annotations
 import uuid
 from datetime import datetime
 from pydantic import BaseModel, Field
 class HighlightScoreBreakdown(BaseModel):
    """Per-dimension score breakdown for a highlight candidate.
    Each field is a float in [0, 1] representing the normalized score
    for that scoring dimension.
    """
    duration_score: float = Field(description="Score based on moment duration (sweet-spot curve)")
    content_density_score: float = Field(description="Score based on transcript richness / word density")
    technique_relevance_score: float = Field(description="Score based on content_type and plugin mentions")
    position_score: float = Field(description="Score based on temporal position within the video")
    uniqueness_score: float = Field(description="Score based on title/topic distinctness among siblings")
    engagement_proxy_score: float = Field(description="Proxy engagement signal from summary quality/length")
    plugin_diversity_score: float = Field(description="Score based on breadth of plugins/tools mentioned")
 class HighlightCandidateResponse(BaseModel):
    """API response schema for a single highlight candidate."""
    id: uuid.UUID
    key_moment_id: uuid.UUID
    source_video_id: uuid.UUID
    score: float = Field(ge=0.0, le=1.0, description="Composite highlight score")
    score_breakdown: HighlightScoreBreakdown
    duration_secs: float = Field(ge=0.0, description="Duration of the key moment in seconds")
    status: str = Field(description="One of: candidate, approved, rejected")
    created_at: datetime
    model_config = {"from_attributes": True}
 class HighlightBatchResult(BaseModel):
    """Summary of a highlight scoring batch run for one video."""
    video_id: uuid.UUID
    candidates_created: int = Field(ge=0, description="Number of new candidates inserted")
    candidates_updated: int = Field(ge=0, description="Number of existing candidates re-scored")
    top_score: float = Field(ge=0.0, le=1.0, description="Highest score in this batch")