- "backend/models.py" - "alembic/versions/002_technique_page_versions.py" - "backend/pipeline/stages.py" GSD-Task: S04/T01
321 lines
12 KiB
Python
321 lines
12 KiB
Python
"""SQLAlchemy ORM models for the Chrysopedia knowledge base.
|
|
|
|
Seven entities matching chrysopedia-spec.md §6.1:
|
|
Creator, SourceVideo, TranscriptSegment, KeyMoment,
|
|
TechniquePage, RelatedTechniqueLink, Tag
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import enum
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
|
|
from sqlalchemy import (
|
|
Enum,
|
|
Float,
|
|
ForeignKey,
|
|
Integer,
|
|
String,
|
|
Text,
|
|
UniqueConstraint,
|
|
func,
|
|
)
|
|
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
|
|
from sqlalchemy.orm import Mapped, mapped_column
|
|
from sqlalchemy.orm import relationship as sa_relationship
|
|
|
|
from database import Base
|
|
|
|
|
|
# ── Enums ────────────────────────────────────────────────────────────────────
|
|
|
|
class ContentType(str, enum.Enum):
|
|
"""Source video content type."""
|
|
tutorial = "tutorial"
|
|
livestream = "livestream"
|
|
breakdown = "breakdown"
|
|
short_form = "short_form"
|
|
|
|
|
|
class ProcessingStatus(str, enum.Enum):
|
|
"""Pipeline processing status for a source video."""
|
|
pending = "pending"
|
|
transcribed = "transcribed"
|
|
extracted = "extracted"
|
|
reviewed = "reviewed"
|
|
published = "published"
|
|
|
|
|
|
class KeyMomentContentType(str, enum.Enum):
|
|
"""Content classification for a key moment."""
|
|
technique = "technique"
|
|
settings = "settings"
|
|
reasoning = "reasoning"
|
|
workflow = "workflow"
|
|
|
|
|
|
class ReviewStatus(str, enum.Enum):
|
|
"""Human review status for key moments."""
|
|
pending = "pending"
|
|
approved = "approved"
|
|
edited = "edited"
|
|
rejected = "rejected"
|
|
|
|
|
|
class SourceQuality(str, enum.Enum):
|
|
"""Derived source quality for technique pages."""
|
|
structured = "structured"
|
|
mixed = "mixed"
|
|
unstructured = "unstructured"
|
|
|
|
|
|
class PageReviewStatus(str, enum.Enum):
|
|
"""Review lifecycle for technique pages."""
|
|
draft = "draft"
|
|
reviewed = "reviewed"
|
|
published = "published"
|
|
|
|
|
|
class RelationshipType(str, enum.Enum):
|
|
"""Types of links between technique pages."""
|
|
same_technique_other_creator = "same_technique_other_creator"
|
|
same_creator_adjacent = "same_creator_adjacent"
|
|
general_cross_reference = "general_cross_reference"
|
|
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
def _uuid_pk() -> Mapped[uuid.UUID]:
|
|
return mapped_column(
|
|
UUID(as_uuid=True),
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
server_default=func.gen_random_uuid(),
|
|
)
|
|
|
|
|
|
def _now() -> datetime:
|
|
"""Return current UTC time as a naive datetime (no tzinfo).
|
|
|
|
PostgreSQL TIMESTAMP WITHOUT TIME ZONE columns require naive datetimes.
|
|
asyncpg rejects timezone-aware datetimes for such columns.
|
|
"""
|
|
return datetime.now(timezone.utc).replace(tzinfo=None)
|
|
|
|
|
|
# ── Models ───────────────────────────────────────────────────────────────────
|
|
|
|
class Creator(Base):
|
|
__tablename__ = "creators"
|
|
|
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
|
name: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
slug: Mapped[str] = mapped_column(String(255), unique=True, nullable=False)
|
|
genres: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
|
|
folder_name: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
view_count: Mapped[int] = mapped_column(Integer, default=0, server_default="0")
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now()
|
|
)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now(), onupdate=_now
|
|
)
|
|
|
|
# relationships
|
|
videos: Mapped[list[SourceVideo]] = sa_relationship(back_populates="creator")
|
|
technique_pages: Mapped[list[TechniquePage]] = sa_relationship(back_populates="creator")
|
|
|
|
|
|
class SourceVideo(Base):
|
|
__tablename__ = "source_videos"
|
|
|
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
|
creator_id: Mapped[uuid.UUID] = mapped_column(
|
|
ForeignKey("creators.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
filename: Mapped[str] = mapped_column(String(500), nullable=False)
|
|
file_path: Mapped[str] = mapped_column(String(1000), nullable=False)
|
|
duration_seconds: Mapped[int] = mapped_column(Integer, nullable=True)
|
|
content_type: Mapped[ContentType] = mapped_column(
|
|
Enum(ContentType, name="content_type", create_constraint=True),
|
|
nullable=False,
|
|
)
|
|
transcript_path: Mapped[str | None] = mapped_column(String(1000), nullable=True)
|
|
processing_status: Mapped[ProcessingStatus] = mapped_column(
|
|
Enum(ProcessingStatus, name="processing_status", create_constraint=True),
|
|
default=ProcessingStatus.pending,
|
|
server_default="pending",
|
|
)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now()
|
|
)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now(), onupdate=_now
|
|
)
|
|
|
|
# relationships
|
|
creator: Mapped[Creator] = sa_relationship(back_populates="videos")
|
|
segments: Mapped[list[TranscriptSegment]] = sa_relationship(back_populates="source_video")
|
|
key_moments: Mapped[list[KeyMoment]] = sa_relationship(back_populates="source_video")
|
|
|
|
|
|
class TranscriptSegment(Base):
|
|
__tablename__ = "transcript_segments"
|
|
|
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
|
source_video_id: Mapped[uuid.UUID] = mapped_column(
|
|
ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
|
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
|
text: Mapped[str] = mapped_column(Text, nullable=False)
|
|
segment_index: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
topic_label: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
|
|
|
# relationships
|
|
source_video: Mapped[SourceVideo] = sa_relationship(back_populates="segments")
|
|
|
|
|
|
class KeyMoment(Base):
|
|
__tablename__ = "key_moments"
|
|
|
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
|
source_video_id: Mapped[uuid.UUID] = mapped_column(
|
|
ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
technique_page_id: Mapped[uuid.UUID | None] = mapped_column(
|
|
ForeignKey("technique_pages.id", ondelete="SET NULL"), nullable=True
|
|
)
|
|
title: Mapped[str] = mapped_column(String(500), nullable=False)
|
|
summary: Mapped[str] = mapped_column(Text, nullable=False)
|
|
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
|
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
|
content_type: Mapped[KeyMomentContentType] = mapped_column(
|
|
Enum(KeyMomentContentType, name="key_moment_content_type", create_constraint=True),
|
|
nullable=False,
|
|
)
|
|
plugins: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
|
|
review_status: Mapped[ReviewStatus] = mapped_column(
|
|
Enum(ReviewStatus, name="review_status", create_constraint=True),
|
|
default=ReviewStatus.pending,
|
|
server_default="pending",
|
|
)
|
|
raw_transcript: Mapped[str | None] = mapped_column(Text, nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now()
|
|
)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now(), onupdate=_now
|
|
)
|
|
|
|
# relationships
|
|
source_video: Mapped[SourceVideo] = sa_relationship(back_populates="key_moments")
|
|
technique_page: Mapped[TechniquePage | None] = sa_relationship(
|
|
back_populates="key_moments", foreign_keys=[technique_page_id]
|
|
)
|
|
|
|
|
|
class TechniquePage(Base):
|
|
__tablename__ = "technique_pages"
|
|
|
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
|
creator_id: Mapped[uuid.UUID] = mapped_column(
|
|
ForeignKey("creators.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
title: Mapped[str] = mapped_column(String(500), nullable=False)
|
|
slug: Mapped[str] = mapped_column(String(500), unique=True, nullable=False)
|
|
topic_category: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
topic_tags: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
|
|
summary: Mapped[str | None] = mapped_column(Text, nullable=True)
|
|
body_sections: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
|
signal_chains: Mapped[list | None] = mapped_column(JSONB, nullable=True)
|
|
plugins: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
|
|
source_quality: Mapped[SourceQuality | None] = mapped_column(
|
|
Enum(SourceQuality, name="source_quality", create_constraint=True),
|
|
nullable=True,
|
|
)
|
|
view_count: Mapped[int] = mapped_column(Integer, default=0, server_default="0")
|
|
review_status: Mapped[PageReviewStatus] = mapped_column(
|
|
Enum(PageReviewStatus, name="page_review_status", create_constraint=True),
|
|
default=PageReviewStatus.draft,
|
|
server_default="draft",
|
|
)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now()
|
|
)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now(), onupdate=_now
|
|
)
|
|
|
|
# relationships
|
|
creator: Mapped[Creator] = sa_relationship(back_populates="technique_pages")
|
|
key_moments: Mapped[list[KeyMoment]] = sa_relationship(
|
|
back_populates="technique_page", foreign_keys=[KeyMoment.technique_page_id]
|
|
)
|
|
versions: Mapped[list[TechniquePageVersion]] = sa_relationship(
|
|
back_populates="technique_page", order_by="TechniquePageVersion.version_number"
|
|
)
|
|
outgoing_links: Mapped[list[RelatedTechniqueLink]] = sa_relationship(
|
|
foreign_keys="RelatedTechniqueLink.source_page_id", back_populates="source_page"
|
|
)
|
|
incoming_links: Mapped[list[RelatedTechniqueLink]] = sa_relationship(
|
|
foreign_keys="RelatedTechniqueLink.target_page_id", back_populates="target_page"
|
|
)
|
|
|
|
|
|
class RelatedTechniqueLink(Base):
|
|
__tablename__ = "related_technique_links"
|
|
__table_args__ = (
|
|
UniqueConstraint("source_page_id", "target_page_id", "relationship", name="uq_technique_link"),
|
|
)
|
|
|
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
|
source_page_id: Mapped[uuid.UUID] = mapped_column(
|
|
ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
target_page_id: Mapped[uuid.UUID] = mapped_column(
|
|
ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
relationship: Mapped[RelationshipType] = mapped_column(
|
|
Enum(RelationshipType, name="relationship_type", create_constraint=True),
|
|
nullable=False,
|
|
)
|
|
|
|
# relationships
|
|
source_page: Mapped[TechniquePage] = sa_relationship(
|
|
foreign_keys=[source_page_id], back_populates="outgoing_links"
|
|
)
|
|
target_page: Mapped[TechniquePage] = sa_relationship(
|
|
foreign_keys=[target_page_id], back_populates="incoming_links"
|
|
)
|
|
|
|
|
|
class TechniquePageVersion(Base):
|
|
"""Snapshot of a TechniquePage before a pipeline re-synthesis overwrites it."""
|
|
__tablename__ = "technique_page_versions"
|
|
|
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
|
technique_page_id: Mapped[uuid.UUID] = mapped_column(
|
|
ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
version_number: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
content_snapshot: Mapped[dict] = mapped_column(JSONB, nullable=False)
|
|
pipeline_metadata: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
default=_now, server_default=func.now()
|
|
)
|
|
|
|
# relationships
|
|
technique_page: Mapped[TechniquePage] = sa_relationship(
|
|
back_populates="versions"
|
|
)
|
|
|
|
|
|
class Tag(Base):
|
|
__tablename__ = "tags"
|
|
|
|
id: Mapped[uuid.UUID] = _uuid_pk()
|
|
name: Mapped[str] = mapped_column(String(255), unique=True, nullable=False)
|
|
category: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
aliases: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
|