feat: Added HighlightCandidate ORM model, Alembic migration 019, and Py…

- "backend/models.py"
- "alembic/versions/019_add_highlight_candidates.py"
- "backend/pipeline/highlight_schemas.py"

GSD-Task: S04/T01
This commit is contained in:
jlightner 2026-04-04 05:30:36 +00:00
parent 90c24d8bf9
commit c374165865
3 changed files with 139 additions and 0 deletions

View file

@ -0,0 +1,45 @@
"""Add highlight_candidates table for highlight detection scoring.
Revision ID: 019_add_highlight_candidates
Revises: 018_add_impersonation_log
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID
revision = "019_add_highlight_candidates"
down_revision = "018_add_impersonation_log"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create the highlight_status enum type
highlight_status = sa.Enum("candidate", "approved", "rejected", name="highlight_status", create_constraint=True)
highlight_status.create(op.get_bind(), checkfirst=True)
op.create_table(
"highlight_candidates",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("key_moment_id", UUID(as_uuid=True), sa.ForeignKey("key_moments.id", ondelete="CASCADE"), nullable=False, unique=True),
sa.Column("source_video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False),
sa.Column("score", sa.Float, nullable=False),
sa.Column("score_breakdown", sa.dialects.postgresql.JSONB, nullable=True),
sa.Column("duration_secs", sa.Float, nullable=False),
sa.Column("status", highlight_status, nullable=False, server_default="candidate"),
sa.Column("created_at", sa.DateTime, server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime, server_default=sa.func.now(), nullable=False),
)
op.create_index("ix_highlight_candidates_source_video_id", "highlight_candidates", ["source_video_id"])
op.create_index("ix_highlight_candidates_score_desc", "highlight_candidates", [sa.text("score DESC")])
op.create_index("ix_highlight_candidates_status", "highlight_candidates", ["status"])
def downgrade() -> None:
op.drop_index("ix_highlight_candidates_status")
op.drop_index("ix_highlight_candidates_score_desc")
op.drop_index("ix_highlight_candidates_source_video_id")
op.drop_table("highlight_candidates")
sa.Enum(name="highlight_status").drop(op.get_bind(), checkfirst=True)

View file

@ -80,6 +80,13 @@ class UserRole(str, enum.Enum):
admin = "admin" admin = "admin"
class HighlightStatus(str, enum.Enum):
"""Triage status for highlight candidates."""
candidate = "candidate"
approved = "approved"
rejected = "rejected"
# ── Helpers ────────────────────────────────────────────────────────────────── # ── Helpers ──────────────────────────────────────────────────────────────────
def _uuid_pk() -> Mapped[uuid.UUID]: def _uuid_pk() -> Mapped[uuid.UUID]:
@ -674,3 +681,39 @@ class ImpersonationLog(Base):
created_at: Mapped[datetime] = mapped_column( created_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now() default=_now, server_default=func.now()
) )
# ── Highlight Detection ─────────────────────────────────────────────────────
class HighlightCandidate(Base):
"""Scored candidate for highlight detection, one per KeyMoment."""
__tablename__ = "highlight_candidates"
__table_args__ = (
UniqueConstraint("key_moment_id", name="uq_highlight_candidate_moment"),
)
id: Mapped[uuid.UUID] = _uuid_pk()
key_moment_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("key_moments.id", ondelete="CASCADE"), nullable=False, unique=True,
)
source_video_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False, index=True,
)
score: Mapped[float] = mapped_column(Float, nullable=False)
score_breakdown: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
duration_secs: Mapped[float] = mapped_column(Float, nullable=False)
status: Mapped[HighlightStatus] = mapped_column(
Enum(HighlightStatus, name="highlight_status", create_constraint=True),
default=HighlightStatus.candidate,
server_default="candidate",
)
created_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now()
)
updated_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now(), onupdate=_now
)
# relationships
key_moment: Mapped[KeyMoment] = sa_relationship()
source_video: Mapped[SourceVideo] = sa_relationship()

View file

@ -0,0 +1,51 @@
"""Pydantic schemas for highlight detection pipeline.
Covers scoring breakdown, candidate responses, and batch result summaries.
"""
from __future__ import annotations
import uuid
from datetime import datetime
from pydantic import BaseModel, Field
class HighlightScoreBreakdown(BaseModel):
"""Per-dimension score breakdown for a highlight candidate.
Each field is a float in [0, 1] representing the normalized score
for that scoring dimension.
"""
duration_score: float = Field(description="Score based on moment duration (sweet-spot curve)")
content_density_score: float = Field(description="Score based on transcript richness / word density")
technique_relevance_score: float = Field(description="Score based on content_type and plugin mentions")
position_score: float = Field(description="Score based on temporal position within the video")
uniqueness_score: float = Field(description="Score based on title/topic distinctness among siblings")
engagement_proxy_score: float = Field(description="Proxy engagement signal from summary quality/length")
plugin_diversity_score: float = Field(description="Score based on breadth of plugins/tools mentioned")
class HighlightCandidateResponse(BaseModel):
"""API response schema for a single highlight candidate."""
id: uuid.UUID
key_moment_id: uuid.UUID
source_video_id: uuid.UUID
score: float = Field(ge=0.0, le=1.0, description="Composite highlight score")
score_breakdown: HighlightScoreBreakdown
duration_secs: float = Field(ge=0.0, description="Duration of the key moment in seconds")
status: str = Field(description="One of: candidate, approved, rejected")
created_at: datetime
model_config = {"from_attributes": True}
class HighlightBatchResult(BaseModel):
"""Summary of a highlight scoring batch run for one video."""
video_id: uuid.UUID
candidates_created: int = Field(ge=0, description="Number of new candidates inserted")
candidates_updated: int = Field(ge=0, description="Number of existing candidates re-scored")
top_score: float = Field(ge=0.0, le=1.0, description="Highest score in this batch")