feat: Added HighlightCandidate ORM model, Alembic migration 019, and Py…

- "backend/models.py" - "alembic/versions/019_add_highlight_candidates.py" - "backend/pipeline/highlight_schemas.py" GSD-Task: S04/T01
2026-04-04 05:30:36 +00:00 · 2026-04-04 05:30:36 +00:00 · c374165865
commit c374165865
parent 90c24d8bf9
3 changed files with 139 additions and 0 deletions
--- a/alembic/versions/019_add_highlight_candidates.py
+++ b/alembic/versions/019_add_highlight_candidates.py
@ -0,0 +1,45 @@
+"""Add highlight_candidates table for highlight detection scoring.
+
+Revision ID: 019_add_highlight_candidates
+Revises: 018_add_impersonation_log
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import UUID
+
+
+revision = "019_add_highlight_candidates"
+down_revision = "018_add_impersonation_log"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create the highlight_status enum type
+    highlight_status = sa.Enum("candidate", "approved", "rejected", name="highlight_status", create_constraint=True)
+    highlight_status.create(op.get_bind(), checkfirst=True)
+
+    op.create_table(
+        "highlight_candidates",
+        sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
+        sa.Column("key_moment_id", UUID(as_uuid=True), sa.ForeignKey("key_moments.id", ondelete="CASCADE"), nullable=False, unique=True),
+        sa.Column("source_video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False),
+        sa.Column("score", sa.Float, nullable=False),
+        sa.Column("score_breakdown", sa.dialects.postgresql.JSONB, nullable=True),
+        sa.Column("duration_secs", sa.Float, nullable=False),
+        sa.Column("status", highlight_status, nullable=False, server_default="candidate"),
+        sa.Column("created_at", sa.DateTime, server_default=sa.func.now(), nullable=False),
+        sa.Column("updated_at", sa.DateTime, server_default=sa.func.now(), nullable=False),
+    )
+    op.create_index("ix_highlight_candidates_source_video_id", "highlight_candidates", ["source_video_id"])
+    op.create_index("ix_highlight_candidates_score_desc", "highlight_candidates", [sa.text("score DESC")])
+    op.create_index("ix_highlight_candidates_status", "highlight_candidates", ["status"])
+
+
+def downgrade() -> None:
+    op.drop_index("ix_highlight_candidates_status")
+    op.drop_index("ix_highlight_candidates_score_desc")
+    op.drop_index("ix_highlight_candidates_source_video_id")
+    op.drop_table("highlight_candidates")
+    sa.Enum(name="highlight_status").drop(op.get_bind(), checkfirst=True)
--- a/backend/models.py
+++ b/backend/models.py
@ -80,6 +80,13 @@ class UserRole(str, enum.Enum):
    admin = "admin"


+class HighlightStatus(str, enum.Enum):
+    """Triage status for highlight candidates."""
+    candidate = "candidate"
+    approved = "approved"
+    rejected = "rejected"
+
+
 # ── Helpers ──────────────────────────────────────────────────────────────────

 def _uuid_pk() -> Mapped[uuid.UUID]:
@ -674,3 +681,39 @@ class ImpersonationLog(Base):
    created_at: Mapped[datetime] = mapped_column(
        default=_now, server_default=func.now()
    )
+
+
+# ── Highlight Detection ─────────────────────────────────────────────────────
+
+class HighlightCandidate(Base):
+    """Scored candidate for highlight detection, one per KeyMoment."""
+    __tablename__ = "highlight_candidates"
+    __table_args__ = (
+        UniqueConstraint("key_moment_id", name="uq_highlight_candidate_moment"),
+    )
+
+    id: Mapped[uuid.UUID] = _uuid_pk()
+    key_moment_id: Mapped[uuid.UUID] = mapped_column(
+        ForeignKey("key_moments.id", ondelete="CASCADE"), nullable=False, unique=True,
+    )
+    source_video_id: Mapped[uuid.UUID] = mapped_column(
+        ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False, index=True,
+    )
+    score: Mapped[float] = mapped_column(Float, nullable=False)
+    score_breakdown: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
+    duration_secs: Mapped[float] = mapped_column(Float, nullable=False)
+    status: Mapped[HighlightStatus] = mapped_column(
+        Enum(HighlightStatus, name="highlight_status", create_constraint=True),
+        default=HighlightStatus.candidate,
+        server_default="candidate",
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        default=_now, server_default=func.now()
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        default=_now, server_default=func.now(), onupdate=_now
+    )
+
+    # relationships
+    key_moment: Mapped[KeyMoment] = sa_relationship()
+    source_video: Mapped[SourceVideo] = sa_relationship()
--- a/backend/pipeline/highlight_schemas.py
+++ b/backend/pipeline/highlight_schemas.py
@ -0,0 +1,51 @@
+"""Pydantic schemas for highlight detection pipeline.
+
+Covers scoring breakdown, candidate responses, and batch result summaries.
+"""
+
+from __future__ import annotations
+
+import uuid
+from datetime import datetime
+
+from pydantic import BaseModel, Field
+
+
+class HighlightScoreBreakdown(BaseModel):
+    """Per-dimension score breakdown for a highlight candidate.
+
+    Each field is a float in [0, 1] representing the normalized score
+    for that scoring dimension.
+    """
+
+    duration_score: float = Field(description="Score based on moment duration (sweet-spot curve)")
+    content_density_score: float = Field(description="Score based on transcript richness / word density")
+    technique_relevance_score: float = Field(description="Score based on content_type and plugin mentions")
+    position_score: float = Field(description="Score based on temporal position within the video")
+    uniqueness_score: float = Field(description="Score based on title/topic distinctness among siblings")
+    engagement_proxy_score: float = Field(description="Proxy engagement signal from summary quality/length")
+    plugin_diversity_score: float = Field(description="Score based on breadth of plugins/tools mentioned")
+
+
+class HighlightCandidateResponse(BaseModel):
+    """API response schema for a single highlight candidate."""
+
+    id: uuid.UUID
+    key_moment_id: uuid.UUID
+    source_video_id: uuid.UUID
+    score: float = Field(ge=0.0, le=1.0, description="Composite highlight score")
+    score_breakdown: HighlightScoreBreakdown
+    duration_secs: float = Field(ge=0.0, description="Duration of the key moment in seconds")
+    status: str = Field(description="One of: candidate, approved, rejected")
+    created_at: datetime
+
+    model_config = {"from_attributes": True}
+
+
+class HighlightBatchResult(BaseModel):
+    """Summary of a highlight scoring batch run for one video."""
+
+    video_id: uuid.UUID
+    candidates_created: int = Field(ge=0, description="Number of new candidates inserted")
+    candidates_updated: int = Field(ge=0, description="Number of existing candidates re-scored")
+    top_score: float = Field(ge=0.0, le=1.0, description="Highest score in this batch")