diff --git a/alembic/versions/019_add_highlight_candidates.py b/alembic/versions/019_add_highlight_candidates.py new file mode 100644 index 0000000..b4f1da5 --- /dev/null +++ b/alembic/versions/019_add_highlight_candidates.py @@ -0,0 +1,45 @@ +"""Add highlight_candidates table for highlight detection scoring. + +Revision ID: 019_add_highlight_candidates +Revises: 018_add_impersonation_log +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID + + +revision = "019_add_highlight_candidates" +down_revision = "018_add_impersonation_log" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Create the highlight_status enum type + highlight_status = sa.Enum("candidate", "approved", "rejected", name="highlight_status", create_constraint=True) + highlight_status.create(op.get_bind(), checkfirst=True) + + op.create_table( + "highlight_candidates", + sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), + sa.Column("key_moment_id", UUID(as_uuid=True), sa.ForeignKey("key_moments.id", ondelete="CASCADE"), nullable=False, unique=True), + sa.Column("source_video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False), + sa.Column("score", sa.Float, nullable=False), + sa.Column("score_breakdown", sa.dialects.postgresql.JSONB, nullable=True), + sa.Column("duration_secs", sa.Float, nullable=False), + sa.Column("status", highlight_status, nullable=False, server_default="candidate"), + sa.Column("created_at", sa.DateTime, server_default=sa.func.now(), nullable=False), + sa.Column("updated_at", sa.DateTime, server_default=sa.func.now(), nullable=False), + ) + op.create_index("ix_highlight_candidates_source_video_id", "highlight_candidates", ["source_video_id"]) + op.create_index("ix_highlight_candidates_score_desc", "highlight_candidates", [sa.text("score DESC")]) + op.create_index("ix_highlight_candidates_status", "highlight_candidates", ["status"]) + + +def downgrade() -> None: + op.drop_index("ix_highlight_candidates_status") + op.drop_index("ix_highlight_candidates_score_desc") + op.drop_index("ix_highlight_candidates_source_video_id") + op.drop_table("highlight_candidates") + sa.Enum(name="highlight_status").drop(op.get_bind(), checkfirst=True) diff --git a/backend/models.py b/backend/models.py index c6e7099..66a0e43 100644 --- a/backend/models.py +++ b/backend/models.py @@ -80,6 +80,13 @@ class UserRole(str, enum.Enum): admin = "admin" +class HighlightStatus(str, enum.Enum): + """Triage status for highlight candidates.""" + candidate = "candidate" + approved = "approved" + rejected = "rejected" + + # ── Helpers ────────────────────────────────────────────────────────────────── def _uuid_pk() -> Mapped[uuid.UUID]: @@ -674,3 +681,39 @@ class ImpersonationLog(Base): created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) + + +# ── Highlight Detection ───────────────────────────────────────────────────── + +class HighlightCandidate(Base): + """Scored candidate for highlight detection, one per KeyMoment.""" + __tablename__ = "highlight_candidates" + __table_args__ = ( + UniqueConstraint("key_moment_id", name="uq_highlight_candidate_moment"), + ) + + id: Mapped[uuid.UUID] = _uuid_pk() + key_moment_id: Mapped[uuid.UUID] = mapped_column( + ForeignKey("key_moments.id", ondelete="CASCADE"), nullable=False, unique=True, + ) + source_video_id: Mapped[uuid.UUID] = mapped_column( + ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False, index=True, + ) + score: Mapped[float] = mapped_column(Float, nullable=False) + score_breakdown: Mapped[dict | None] = mapped_column(JSONB, nullable=True) + duration_secs: Mapped[float] = mapped_column(Float, nullable=False) + status: Mapped[HighlightStatus] = mapped_column( + Enum(HighlightStatus, name="highlight_status", create_constraint=True), + default=HighlightStatus.candidate, + server_default="candidate", + ) + created_at: Mapped[datetime] = mapped_column( + default=_now, server_default=func.now() + ) + updated_at: Mapped[datetime] = mapped_column( + default=_now, server_default=func.now(), onupdate=_now + ) + + # relationships + key_moment: Mapped[KeyMoment] = sa_relationship() + source_video: Mapped[SourceVideo] = sa_relationship() diff --git a/backend/pipeline/highlight_schemas.py b/backend/pipeline/highlight_schemas.py new file mode 100644 index 0000000..983ede6 --- /dev/null +++ b/backend/pipeline/highlight_schemas.py @@ -0,0 +1,51 @@ +"""Pydantic schemas for highlight detection pipeline. + +Covers scoring breakdown, candidate responses, and batch result summaries. +""" + +from __future__ import annotations + +import uuid +from datetime import datetime + +from pydantic import BaseModel, Field + + +class HighlightScoreBreakdown(BaseModel): + """Per-dimension score breakdown for a highlight candidate. + + Each field is a float in [0, 1] representing the normalized score + for that scoring dimension. + """ + + duration_score: float = Field(description="Score based on moment duration (sweet-spot curve)") + content_density_score: float = Field(description="Score based on transcript richness / word density") + technique_relevance_score: float = Field(description="Score based on content_type and plugin mentions") + position_score: float = Field(description="Score based on temporal position within the video") + uniqueness_score: float = Field(description="Score based on title/topic distinctness among siblings") + engagement_proxy_score: float = Field(description="Proxy engagement signal from summary quality/length") + plugin_diversity_score: float = Field(description="Score based on breadth of plugins/tools mentioned") + + +class HighlightCandidateResponse(BaseModel): + """API response schema for a single highlight candidate.""" + + id: uuid.UUID + key_moment_id: uuid.UUID + source_video_id: uuid.UUID + score: float = Field(ge=0.0, le=1.0, description="Composite highlight score") + score_breakdown: HighlightScoreBreakdown + duration_secs: float = Field(ge=0.0, description="Duration of the key moment in seconds") + status: str = Field(description="One of: candidate, approved, rejected") + created_at: datetime + + model_config = {"from_attributes": True} + + +class HighlightBatchResult(BaseModel): + """Summary of a highlight scoring batch run for one video.""" + + video_id: uuid.UUID + candidates_created: int = Field(ge=0, description="Number of new candidates inserted") + candidates_updated: int = Field(ge=0, description="Number of existing candidates re-scored") + top_score: float = Field(ge=0.0, le=1.0, description="Highest score in this batch")