"""SQLAlchemy ORM models for the Chrysopedia knowledge base. Seven entities matching chrysopedia-spec.md §6.1: Creator, SourceVideo, TranscriptSegment, KeyMoment, TechniquePage, RelatedTechniqueLink, Tag """ from __future__ import annotations import enum import uuid from datetime import datetime, timezone from sqlalchemy import ( Enum, Float, ForeignKey, Integer, String, Text, UniqueConstraint, func, ) from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import relationship as sa_relationship from database import Base # ── Enums ──────────────────────────────────────────────────────────────────── class ContentType(str, enum.Enum): """Source video content type.""" tutorial = "tutorial" livestream = "livestream" breakdown = "breakdown" short_form = "short_form" class ProcessingStatus(str, enum.Enum): """Pipeline processing status for a source video. User-facing lifecycle: not_started → queued → processing → complete Error branch: processing → error (retrigger resets to queued) """ not_started = "not_started" queued = "queued" processing = "processing" error = "error" complete = "complete" class KeyMomentContentType(str, enum.Enum): """Content classification for a key moment.""" technique = "technique" settings = "settings" reasoning = "reasoning" workflow = "workflow" class SourceQuality(str, enum.Enum): """Derived source quality for technique pages.""" structured = "structured" mixed = "mixed" unstructured = "unstructured" class RelationshipType(str, enum.Enum): """Types of links between technique pages.""" same_technique_other_creator = "same_technique_other_creator" same_creator_adjacent = "same_creator_adjacent" general_cross_reference = "general_cross_reference" # ── Helpers ────────────────────────────────────────────────────────────────── def _uuid_pk() -> Mapped[uuid.UUID]: return mapped_column( UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, server_default=func.gen_random_uuid(), ) def _now() -> datetime: """Return current UTC time as a naive datetime (no tzinfo). PostgreSQL TIMESTAMP WITHOUT TIME ZONE columns require naive datetimes. asyncpg rejects timezone-aware datetimes for such columns. """ return datetime.now(timezone.utc).replace(tzinfo=None) # ── Models ─────────────────────────────────────────────────────────────────── class Creator(Base): __tablename__ = "creators" id: Mapped[uuid.UUID] = _uuid_pk() name: Mapped[str] = mapped_column(String(255), nullable=False) slug: Mapped[str] = mapped_column(String(255), unique=True, nullable=False) genres: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) folder_name: Mapped[str] = mapped_column(String(255), nullable=False) avatar_url: Mapped[str | None] = mapped_column(String(1000), nullable=True) avatar_source: Mapped[str | None] = mapped_column(String(50), nullable=True) avatar_fetched_at: Mapped[datetime | None] = mapped_column(nullable=True) bio: Mapped[str | None] = mapped_column(Text, nullable=True) social_links: Mapped[dict | None] = mapped_column(JSONB, nullable=True) featured: Mapped[bool] = mapped_column(default=False, server_default="false") view_count: Mapped[int] = mapped_column(Integer, default=0, server_default="0") hidden: Mapped[bool] = mapped_column(default=False, server_default="false") created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), onupdate=_now ) # relationships videos: Mapped[list[SourceVideo]] = sa_relationship(back_populates="creator") technique_pages: Mapped[list[TechniquePage]] = sa_relationship(back_populates="creator") class SourceVideo(Base): __tablename__ = "source_videos" id: Mapped[uuid.UUID] = _uuid_pk() creator_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("creators.id", ondelete="CASCADE"), nullable=False ) filename: Mapped[str] = mapped_column(String(500), nullable=False) file_path: Mapped[str] = mapped_column(String(1000), nullable=False) duration_seconds: Mapped[int] = mapped_column(Integer, nullable=True) content_type: Mapped[ContentType] = mapped_column( Enum(ContentType, name="content_type", create_constraint=True), nullable=False, ) transcript_path: Mapped[str | None] = mapped_column(String(1000), nullable=True) content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True) processing_status: Mapped[ProcessingStatus] = mapped_column( Enum(ProcessingStatus, name="processing_status", create_constraint=True), default=ProcessingStatus.not_started, server_default="not_started", ) classification_data: Mapped[list | None] = mapped_column(JSONB, nullable=True) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), onupdate=_now ) # relationships creator: Mapped[Creator] = sa_relationship(back_populates="videos") segments: Mapped[list[TranscriptSegment]] = sa_relationship(back_populates="source_video") key_moments: Mapped[list[KeyMoment]] = sa_relationship(back_populates="source_video") class TranscriptSegment(Base): __tablename__ = "transcript_segments" id: Mapped[uuid.UUID] = _uuid_pk() source_video_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False ) start_time: Mapped[float] = mapped_column(Float, nullable=False) end_time: Mapped[float] = mapped_column(Float, nullable=False) text: Mapped[str] = mapped_column(Text, nullable=False) segment_index: Mapped[int] = mapped_column(Integer, nullable=False) topic_label: Mapped[str | None] = mapped_column(String(255), nullable=True) # relationships source_video: Mapped[SourceVideo] = sa_relationship(back_populates="segments") class KeyMoment(Base): __tablename__ = "key_moments" id: Mapped[uuid.UUID] = _uuid_pk() source_video_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False ) technique_page_id: Mapped[uuid.UUID | None] = mapped_column( ForeignKey("technique_pages.id", ondelete="SET NULL"), nullable=True ) title: Mapped[str] = mapped_column(String(500), nullable=False) summary: Mapped[str] = mapped_column(Text, nullable=False) start_time: Mapped[float] = mapped_column(Float, nullable=False) end_time: Mapped[float] = mapped_column(Float, nullable=False) content_type: Mapped[KeyMomentContentType] = mapped_column( Enum(KeyMomentContentType, name="key_moment_content_type", create_constraint=True), nullable=False, ) plugins: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) raw_transcript: Mapped[str | None] = mapped_column(Text, nullable=True) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), onupdate=_now ) # relationships source_video: Mapped[SourceVideo] = sa_relationship(back_populates="key_moments") technique_page: Mapped[TechniquePage | None] = sa_relationship( back_populates="key_moments", foreign_keys=[technique_page_id] ) class TechniquePage(Base): __tablename__ = "technique_pages" id: Mapped[uuid.UUID] = _uuid_pk() creator_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("creators.id", ondelete="CASCADE"), nullable=False ) title: Mapped[str] = mapped_column(String(500), nullable=False) slug: Mapped[str] = mapped_column(String(500), unique=True, nullable=False) topic_category: Mapped[str] = mapped_column(String(255), nullable=False) topic_tags: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) summary: Mapped[str | None] = mapped_column(Text, nullable=True) body_sections: Mapped[dict | None] = mapped_column(JSONB, nullable=True) body_sections_format: Mapped[str] = mapped_column( String(20), nullable=False, default="v1", server_default="v1" ) signal_chains: Mapped[list | None] = mapped_column(JSONB, nullable=True) plugins: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) source_quality: Mapped[SourceQuality | None] = mapped_column( Enum(SourceQuality, name="source_quality", create_constraint=True), nullable=True, ) view_count: Mapped[int] = mapped_column(Integer, default=0, server_default="0") created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) updated_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), onupdate=_now ) # relationships creator: Mapped[Creator] = sa_relationship(back_populates="technique_pages") key_moments: Mapped[list[KeyMoment]] = sa_relationship( back_populates="technique_page", foreign_keys=[KeyMoment.technique_page_id] ) versions: Mapped[list[TechniquePageVersion]] = sa_relationship( back_populates="technique_page", order_by="TechniquePageVersion.version_number" ) outgoing_links: Mapped[list[RelatedTechniqueLink]] = sa_relationship( foreign_keys="RelatedTechniqueLink.source_page_id", back_populates="source_page" ) incoming_links: Mapped[list[RelatedTechniqueLink]] = sa_relationship( foreign_keys="RelatedTechniqueLink.target_page_id", back_populates="target_page" ) source_video_links: Mapped[list[TechniquePageVideo]] = sa_relationship( back_populates="technique_page" ) class RelatedTechniqueLink(Base): __tablename__ = "related_technique_links" __table_args__ = ( UniqueConstraint("source_page_id", "target_page_id", "relationship", name="uq_technique_link"), ) id: Mapped[uuid.UUID] = _uuid_pk() source_page_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False ) target_page_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False ) relationship: Mapped[RelationshipType] = mapped_column( Enum(RelationshipType, name="relationship_type", create_constraint=True), nullable=False, ) # relationships source_page: Mapped[TechniquePage] = sa_relationship( foreign_keys=[source_page_id], back_populates="outgoing_links" ) target_page: Mapped[TechniquePage] = sa_relationship( foreign_keys=[target_page_id], back_populates="incoming_links" ) class TechniquePageVersion(Base): """Snapshot of a TechniquePage before a pipeline re-synthesis overwrites it.""" __tablename__ = "technique_page_versions" id: Mapped[uuid.UUID] = _uuid_pk() technique_page_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False ) version_number: Mapped[int] = mapped_column(Integer, nullable=False) content_snapshot: Mapped[dict] = mapped_column(JSONB, nullable=False) pipeline_metadata: Mapped[dict | None] = mapped_column(JSONB, nullable=True) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) # relationships technique_page: Mapped[TechniquePage] = sa_relationship( back_populates="versions" ) class Tag(Base): __tablename__ = "tags" id: Mapped[uuid.UUID] = _uuid_pk() name: Mapped[str] = mapped_column(String(255), unique=True, nullable=False) category: Mapped[str] = mapped_column(String(255), nullable=False) aliases: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True) class TechniquePageVideo(Base): """Association linking a technique page to its contributing source videos.""" __tablename__ = "technique_page_videos" __table_args__ = ( UniqueConstraint("technique_page_id", "source_video_id", name="uq_page_video"), ) id: Mapped[uuid.UUID] = _uuid_pk() technique_page_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False ) source_video_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False ) added_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) # relationships technique_page: Mapped[TechniquePage] = sa_relationship( back_populates="source_video_links" ) source_video: Mapped[SourceVideo] = sa_relationship() # ── Content Report Enums ───────────────────────────────────────────────────── class ReportType(str, enum.Enum): """Classification of user-submitted content reports.""" inaccurate = "inaccurate" missing_info = "missing_info" wrong_attribution = "wrong_attribution" formatting = "formatting" other = "other" class ReportStatus(str, enum.Enum): """Triage status for content reports.""" open = "open" acknowledged = "acknowledged" resolved = "resolved" dismissed = "dismissed" # ── Content Report ─────────────────────────────────────────────────────────── class ContentReport(Base): """User-submitted report about a content issue. Generic: content_type + content_id can reference any entity (technique_page, key_moment, creator, or general). """ __tablename__ = "content_reports" id: Mapped[uuid.UUID] = _uuid_pk() content_type: Mapped[str] = mapped_column( String(50), nullable=False, doc="Entity type: technique_page, key_moment, creator, general" ) content_id: Mapped[uuid.UUID | None] = mapped_column( UUID(as_uuid=True), nullable=True, doc="FK to the reported entity (null for general reports)" ) content_title: Mapped[str | None] = mapped_column( String(500), nullable=True, doc="Snapshot of entity title at report time" ) report_type: Mapped[ReportType] = mapped_column( Enum(ReportType, name="report_type", create_constraint=True), nullable=False, ) description: Mapped[str] = mapped_column(Text, nullable=False) status: Mapped[ReportStatus] = mapped_column( Enum(ReportStatus, name="report_status", create_constraint=True), default=ReportStatus.open, server_default="open", ) admin_notes: Mapped[str | None] = mapped_column(Text, nullable=True) page_url: Mapped[str | None] = mapped_column( String(1000), nullable=True, doc="URL the user was on when reporting" ) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) resolved_at: Mapped[datetime | None] = mapped_column(nullable=True) # ── Pipeline Event ─────────────────────────────────────────────────────────── class SearchLog(Base): """Logged search query for analytics and popular searches.""" __tablename__ = "search_log" id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) query: Mapped[str] = mapped_column(String(500), nullable=False, index=True) scope: Mapped[str] = mapped_column(String(50), nullable=False) result_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now(), index=True ) class PipelineRunStatus(str, enum.Enum): """Status of a pipeline run.""" running = "running" complete = "complete" error = "error" cancelled = "cancelled" class PipelineRunTrigger(str, enum.Enum): """What initiated a pipeline run.""" manual = "manual" clean_reprocess = "clean_reprocess" auto_ingest = "auto_ingest" bulk = "bulk" stage_rerun = "stage_rerun" class PipelineRun(Base): """A single execution of the pipeline for a video. Each trigger/retrigger creates a new run. Events are scoped to a run via run_id, giving a clean audit trail per execution. """ __tablename__ = "pipeline_runs" id: Mapped[uuid.UUID] = _uuid_pk() video_id: Mapped[uuid.UUID] = mapped_column( ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False, index=True, ) run_number: Mapped[int] = mapped_column( Integer, nullable=False, doc="Auto-increment per video, 1-indexed" ) trigger: Mapped[PipelineRunTrigger] = mapped_column( Enum(PipelineRunTrigger, name="pipeline_run_trigger", create_constraint=True), nullable=False, ) status: Mapped[PipelineRunStatus] = mapped_column( Enum(PipelineRunStatus, name="pipeline_run_status", create_constraint=True), default=PipelineRunStatus.running, server_default="running", ) started_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) finished_at: Mapped[datetime | None] = mapped_column(nullable=True) error_stage: Mapped[str | None] = mapped_column(String(50), nullable=True) total_tokens: Mapped[int] = mapped_column(Integer, default=0, server_default="0") # relationships video: Mapped[SourceVideo] = sa_relationship() events: Mapped[list[PipelineEvent]] = sa_relationship( back_populates="run", foreign_keys="PipelineEvent.run_id" ) # ── Pipeline Event ─────────────────────────────────────────────────────────── class PipelineEvent(Base): """Structured log entry for pipeline execution. Captures per-stage start/complete/error/llm_call events with token usage and optional response payloads for debugging. """ __tablename__ = "pipeline_events" id: Mapped[uuid.UUID] = _uuid_pk() video_id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), nullable=False, index=True, ) run_id: Mapped[uuid.UUID | None] = mapped_column( ForeignKey("pipeline_runs.id", ondelete="SET NULL"), nullable=True, index=True, ) stage: Mapped[str] = mapped_column( String(50), nullable=False, doc="stage2_segmentation, stage3_extraction, etc." ) event_type: Mapped[str] = mapped_column( String(30), nullable=False, doc="start, complete, error, llm_call" ) prompt_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True) completion_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True) total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True) model: Mapped[str | None] = mapped_column(String(100), nullable=True) duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True) payload: Mapped[dict | None] = mapped_column( JSONB, nullable=True, doc="LLM response content, error details, stage metadata" ) created_at: Mapped[datetime] = mapped_column( default=_now, server_default=func.now() ) # Debug mode — full LLM I/O capture columns system_prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True) user_prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True) response_text: Mapped[str | None] = mapped_column(Text, nullable=True) # relationships run: Mapped[PipelineRun | None] = sa_relationship( back_populates="events", foreign_keys=[run_id] )