"""SQLAlchemy ORM models for the Chrysopedia knowledge base. Seven entities matching chrysopedia-spec.md §6.1: Creator, SourceVideo, TranscriptSegment, KeyMoment, TechniquePage, RelatedTechniqueLink, Tag """ from __future__ import annotations import enum import uuid from datetime import datetime, timezone from sqlalchemy import ( Enum, Float, ForeignKey, Integer, String, Text, UniqueConstraint, func, ) from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import relationship as sa_relationship from database import Base # ── Enums ──────────────────────────────────────────────────────────────────── class ContentType(str, enum.Enum): """Source video content type.""" tutorial = "tutorial" livestream = "livestream" breakdown = "breakdown" short_form = "short_form" class ProcessingStatus(str, enum.Enum): """Pipeline processing status for a source video.""" pending = "pending" transcribed = "transcribed" extracted = "extracted" reviewed = "reviewed" published = "published" class KeyMomentContentType(str, enum.Enum): """Content classification for a key moment.""" technique = "technique" settings = "settings" reasoning = "reasoning" workflow = "workflow" class ReviewStatus(str, enum.Enum): """Human review status for key moments.""" pending = "pending" approved = "approved" edited = "edited" rejected = "rejected" class SourceQuality(str, enum.Enum): """Derived source quality for technique pages.""" structured = "structured" mixed = "mixed" unstructured = "unstructured" class PageReviewStatus(str, enum.Enum): """Review lifecycle for technique pages.""" draft = "draft" reviewed = "reviewed" published = "published" class RelationshipType(str, enum.Enum): """Types of links between technique pages.""" same_technique_other_creator = "same_technique_other_creator" same_creator_adjacent = "same_creator_adjacent" general_cross_reference = "general_cross_reference" # ── Helpers ────────────────────────────────────────────────────────────────── def _uuid_pk() -> Mapped[uuid.UUID]: return mapped_column( UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, server_default=func.gen_random_uuid(), ) def _now() -> datetime: """Return current UTC time as a naive datetime (no tzinfo). PostgreSQL TIMESTAMP WITHOUT TIME ZONE columns require naive datetimes. asyncpg rejects timezone-aware datetimes for such columns. """ return datetime.now(timezone.utc).replace(tzinfo=None) # ── Models ─────────────────────────────────────────────────────────────────── class Creator(Base): __tablename__ = "creators" id: Mapped[uuid.UUID] = _uuid_pk() name: Mapped[str] = mapped_column(String(255), nullable=False) slug: Mapped[str] = mapped_column(String(255), unique=True, nullable=False) genres: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) folder_name: Mapped[str] = mapped_column(String(255), nullable=False) view_count: Mapped[int] = mapped_column(Integer, default=0, server_default="0") created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), onupdate=_now ) # relationships videos: Mapped[list[SourceVideo]] = sa_relationship(back_populates="creator") technique_pages: Mapped[list[TechniquePage]] = sa_relationship(back_populates="creator") class SourceVideo(Base): __tablename__ = "source_videos" id: Mapped[uuid.UUID] = _uuid_pk() creator_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("creators.id", ondelete="CASCADE"), nullable=False ) filename: Mapped[str] = mapped_column(String(500), nullable=False) file_path: Mapped[str] = mapped_column(String(1000), nullable=False) duration_seconds: Mapped[int] = mapped_column(Integer, nullable=True) content_type: Mapped[ContentType] = mapped_column( Enum(ContentType, name="content_type", create_constraint=True), nullable=False, ) transcript_path: Mapped[str | None] = mapped_column(String(1000), nullable=True) content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True) processing_status: Mapped[ProcessingStatus] = mapped_column( Enum(ProcessingStatus, name="processing_status", create_constraint=True), default=ProcessingStatus.pending, server_default="pending", ) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), onupdate=_now ) # relationships creator: Mapped[Creator] = sa_relationship(back_populates="videos") segments: Mapped[list[TranscriptSegment]] = sa_relationship(back_populates="source_video") key_moments: Mapped[list[KeyMoment]] = sa_relationship(back_populates="source_video") class TranscriptSegment(Base): __tablename__ = "transcript_segments" id: Mapped[uuid.UUID] = _uuid_pk() source_video_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False ) start_time: Mapped[float] = mapped_column(Float, nullable=False) end_time: Mapped[float] = mapped_column(Float, nullable=False) text: Mapped[str] = mapped_column(Text, nullable=False) segment_index: Mapped[int] = mapped_column(Integer, nullable=False) topic_label: Mapped[str | None] = mapped_column(String(255), nullable=True) # relationships source_video: Mapped[SourceVideo] = sa_relationship(back_populates="segments") class KeyMoment(Base): __tablename__ = "key_moments" id: Mapped[uuid.UUID] = _uuid_pk() source_video_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False ) technique_page_id: Mapped[uuid.UUID | None] = mapped_column( ForeignKey("technique_pages.id", ondelete="SET NULL"), nullable=True ) title: Mapped[str] = mapped_column(String(500), nullable=False) summary: Mapped[str] = mapped_column(Text, nullable=False) start_time: Mapped[float] = mapped_column(Float, nullable=False) end_time: Mapped[float] = mapped_column(Float, nullable=False) content_type: Mapped[KeyMomentContentType] = mapped_column( Enum(KeyMomentContentType, name="key_moment_content_type", create_constraint=True), nullable=False, ) plugins: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) review_status: Mapped[ReviewStatus] = mapped_column( Enum(ReviewStatus, name="review_status", create_constraint=True), default=ReviewStatus.pending, server_default="pending", ) raw_transcript: Mapped[str | None] = mapped_column(Text, nullable=True) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), onupdate=_now ) # relationships source_video: Mapped[SourceVideo] = sa_relationship(back_populates="key_moments") technique_page: Mapped[TechniquePage | None] = sa_relationship( back_populates="key_moments", foreign_keys=[technique_page_id] ) class TechniquePage(Base): __tablename__ = "technique_pages" id: Mapped[uuid.UUID] = _uuid_pk() creator_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("creators.id", ondelete="CASCADE"), nullable=False ) title: Mapped[str] = mapped_column(String(500), nullable=False) slug: Mapped[str] = mapped_column(String(500), unique=True, nullable=False) topic_category: Mapped[str] = mapped_column(String(255), nullable=False) topic_tags: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) summary: Mapped[str | None] = mapped_column(Text, nullable=True) body_sections: Mapped[dict | None] = mapped_column(JSONB, nullable=True) signal_chains: Mapped[list | None] = mapped_column(JSONB, nullable=True) plugins: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) source_quality: Mapped[SourceQuality | None] = mapped_column( Enum(SourceQuality, name="source_quality", create_constraint=True), nullable=True, ) view_count: Mapped[int] = mapped_column(Integer, default=0, server_default="0") review_status: Mapped[PageReviewStatus] = mapped_column( Enum(PageReviewStatus, name="page_review_status", create_constraint=True), default=PageReviewStatus.draft, server_default="draft", ) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), onupdate=_now ) # relationships creator: Mapped[Creator] = sa_relationship(back_populates="technique_pages") key_moments: Mapped[list[KeyMoment]] = sa_relationship( back_populates="technique_page", foreign_keys=[KeyMoment.technique_page_id] ) versions: Mapped[list[TechniquePageVersion]] = sa_relationship( back_populates="technique_page", order_by="TechniquePageVersion.version_number" ) outgoing_links: Mapped[list[RelatedTechniqueLink]] = sa_relationship( foreign_keys="RelatedTechniqueLink.source_page_id", back_populates="source_page" ) incoming_links: Mapped[list[RelatedTechniqueLink]] = sa_relationship( foreign_keys="RelatedTechniqueLink.target_page_id", back_populates="target_page" ) class RelatedTechniqueLink(Base): __tablename__ = "related_technique_links" __table_args__ = ( UniqueConstraint("source_page_id", "target_page_id", "relationship", name="uq_technique_link"), ) id: Mapped[uuid.UUID] = _uuid_pk() source_page_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False ) target_page_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False ) relationship: Mapped[RelationshipType] = mapped_column( Enum(RelationshipType, name="relationship_type", create_constraint=True), nullable=False, ) # relationships source_page: Mapped[TechniquePage] = sa_relationship( foreign_keys=[source_page_id], back_populates="outgoing_links" ) target_page: Mapped[TechniquePage] = sa_relationship( foreign_keys=[target_page_id], back_populates="incoming_links" ) class TechniquePageVersion(Base): """Snapshot of a TechniquePage before a pipeline re-synthesis overwrites it.""" __tablename__ = "technique_page_versions" id: Mapped[uuid.UUID] = _uuid_pk() technique_page_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False ) version_number: Mapped[int] = mapped_column(Integer, nullable=False) content_snapshot: Mapped[dict] = mapped_column(JSONB, nullable=False) pipeline_metadata: Mapped[dict | None] = mapped_column(JSONB, nullable=True) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) # relationships technique_page: Mapped[TechniquePage] = sa_relationship( back_populates="versions" ) class Tag(Base): __tablename__ = "tags" id: Mapped[uuid.UUID] = _uuid_pk() name: Mapped[str] = mapped_column(String(255), unique=True, nullable=False) category: Mapped[str] = mapped_column(String(255), nullable=False) aliases: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) # ── Content Report Enums ───────────────────────────────────────────────────── class ReportType(str, enum.Enum): """Classification of user-submitted content reports.""" inaccurate = "inaccurate" missing_info = "missing_info" wrong_attribution = "wrong_attribution" formatting = "formatting" other = "other" class ReportStatus(str, enum.Enum): """Triage status for content reports.""" open = "open" acknowledged = "acknowledged" resolved = "resolved" dismissed = "dismissed" # ── Content Report ─────────────────────────────────────────────────────────── class ContentReport(Base): """User-submitted report about a content issue. Generic: content_type + content_id can reference any entity (technique_page, key_moment, creator, or general). """ __tablename__ = "content_reports" id: Mapped[uuid.UUID] = _uuid_pk() content_type: Mapped[str] = mapped_column( String(50), nullable=False, doc="Entity type: technique_page, key_moment, creator, general" ) content_id: Mapped[uuid.UUID | None] = mapped_column( UUID(as_uuid=True), nullable=True, doc="FK to the reported entity (null for general reports)" ) content_title: Mapped[str | None] = mapped_column( String(500), nullable=True, doc="Snapshot of entity title at report time" ) report_type: Mapped[ReportType] = mapped_column( Enum(ReportType, name="report_type", create_constraint=True), nullable=False, ) description: Mapped[str] = mapped_column(Text, nullable=False) status: Mapped[ReportStatus] = mapped_column( Enum(ReportStatus, name="report_status", create_constraint=True), default=ReportStatus.open, server_default="open", ) admin_notes: Mapped[str | None] = mapped_column(Text, nullable=True) page_url: Mapped[str | None] = mapped_column( String(1000), nullable=True, doc="URL the user was on when reporting" ) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) resolved_at: Mapped[datetime | None] = mapped_column(nullable=True) # ── Pipeline Event ─────────────────────────────────────────────────────────── class PipelineEvent(Base): """Structured log entry for pipeline execution. Captures per-stage start/complete/error/llm_call events with token usage and optional response payloads for debugging. """ __tablename__ = "pipeline_events" id: Mapped[uuid.UUID] = _uuid_pk() video_id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), nullable=False, index=True, ) stage: Mapped[str] = mapped_column( String(50), nullable=False, doc="stage2_segmentation, stage3_extraction, etc." ) event_type: Mapped[str] = mapped_column( String(30), nullable=False, doc="start, complete, error, llm_call" ) prompt_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True) completion_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True) total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True) model: Mapped[str | None] = mapped_column(String(100), nullable=True) duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True) payload: Mapped[dict | None] = mapped_column( JSONB, nullable=True, doc="LLM response content, error details, stage metadata" ) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) # Debug mode — full LLM I/O capture columns system_prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True) user_prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True) response_text: Mapped[str | None] = mapped_column(Text, nullable=True)