fix: Created SQLAlchemy models for all 7 entities, Alembic async migrat…

- "backend/models.py"
- "backend/database.py"
- "alembic/versions/001_initial.py"
- "alembic/env.py"
- "alembic.ini"
- "alembic/script.py.mako"
- "docker-compose.yml"
- ".gsd/KNOWLEDGE.md"

GSD-Task: S01/T02
This commit is contained in:
jlightner 2026-03-29 21:48:36 +00:00
parent c404270f49
commit b2bb23930c
7 changed files with 621 additions and 1 deletions

37
alembic.ini Normal file
View file

@ -0,0 +1,37 @@
# Chrysopedia — Alembic configuration
[alembic]
script_location = alembic
sqlalchemy.url = postgresql+asyncpg://chrysopedia:changeme@localhost:5433/chrysopedia
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

69
alembic/env.py Normal file
View file

@ -0,0 +1,69 @@
"""Alembic env.py — async migration runner for Chrysopedia."""
import asyncio
import os
import sys
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import async_engine_from_config
# Ensure the backend package is importable
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "backend"))
from database import Base # noqa: E402
import models # noqa: E402, F401 — registers all tables on Base.metadata
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
# Allow DATABASE_URL env var to override alembic.ini
url_override = os.getenv("DATABASE_URL")
if url_override:
config.set_main_option("sqlalchemy.url", url_override)
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode — emit SQL to stdout."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection):
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations() -> None:
"""Run migrations in 'online' mode with an async engine."""
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def run_migrations_online() -> None:
asyncio.run(run_async_migrations())
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

25
alembic/script.py.mako Normal file
View file

@ -0,0 +1,25 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View file

@ -0,0 +1,171 @@
"""initial schema — 7 core entities
Revision ID: 001_initial
Revises:
Create Date: 2026-03-29
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
# revision identifiers, used by Alembic.
revision: str = "001_initial"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ── Enum types ───────────────────────────────────────────────────────
content_type = sa.Enum(
"tutorial", "livestream", "breakdown", "short_form",
name="content_type",
)
processing_status = sa.Enum(
"pending", "transcribed", "extracted", "reviewed", "published",
name="processing_status",
)
key_moment_content_type = sa.Enum(
"technique", "settings", "reasoning", "workflow",
name="key_moment_content_type",
)
review_status = sa.Enum(
"pending", "approved", "edited", "rejected",
name="review_status",
)
source_quality = sa.Enum(
"structured", "mixed", "unstructured",
name="source_quality",
)
page_review_status = sa.Enum(
"draft", "reviewed", "published",
name="page_review_status",
)
relationship_type = sa.Enum(
"same_technique_other_creator", "same_creator_adjacent", "general_cross_reference",
name="relationship_type",
)
# ── creators ─────────────────────────────────────────────────────────
op.create_table(
"creators",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("name", sa.String(255), nullable=False),
sa.Column("slug", sa.String(255), nullable=False, unique=True),
sa.Column("genres", ARRAY(sa.String), nullable=True),
sa.Column("folder_name", sa.String(255), nullable=False),
sa.Column("view_count", sa.Integer, nullable=False, server_default="0"),
sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
)
# ── source_videos ────────────────────────────────────────────────────
op.create_table(
"source_videos",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("creator_id", UUID(as_uuid=True), sa.ForeignKey("creators.id", ondelete="CASCADE"), nullable=False),
sa.Column("filename", sa.String(500), nullable=False),
sa.Column("file_path", sa.String(1000), nullable=False),
sa.Column("duration_seconds", sa.Integer, nullable=True),
sa.Column("content_type", content_type, nullable=False),
sa.Column("transcript_path", sa.String(1000), nullable=True),
sa.Column("processing_status", processing_status, nullable=False, server_default="pending"),
sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
)
op.create_index("ix_source_videos_creator_id", "source_videos", ["creator_id"])
# ── transcript_segments ──────────────────────────────────────────────
op.create_table(
"transcript_segments",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("source_video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False),
sa.Column("start_time", sa.Float, nullable=False),
sa.Column("end_time", sa.Float, nullable=False),
sa.Column("text", sa.Text, nullable=False),
sa.Column("segment_index", sa.Integer, nullable=False),
sa.Column("topic_label", sa.String(255), nullable=True),
)
op.create_index("ix_transcript_segments_video_id", "transcript_segments", ["source_video_id"])
# ── technique_pages (must come before key_moments due to FK) ─────────
op.create_table(
"technique_pages",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("creator_id", UUID(as_uuid=True), sa.ForeignKey("creators.id", ondelete="CASCADE"), nullable=False),
sa.Column("title", sa.String(500), nullable=False),
sa.Column("slug", sa.String(500), nullable=False, unique=True),
sa.Column("topic_category", sa.String(255), nullable=False),
sa.Column("topic_tags", ARRAY(sa.String), nullable=True),
sa.Column("summary", sa.Text, nullable=True),
sa.Column("body_sections", JSONB, nullable=True),
sa.Column("signal_chains", JSONB, nullable=True),
sa.Column("plugins", ARRAY(sa.String), nullable=True),
sa.Column("source_quality", source_quality, nullable=True),
sa.Column("view_count", sa.Integer, nullable=False, server_default="0"),
sa.Column("review_status", page_review_status, nullable=False, server_default="draft"),
sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
)
op.create_index("ix_technique_pages_creator_id", "technique_pages", ["creator_id"])
op.create_index("ix_technique_pages_topic_category", "technique_pages", ["topic_category"])
# ── key_moments ──────────────────────────────────────────────────────
op.create_table(
"key_moments",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("source_video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False),
sa.Column("technique_page_id", UUID(as_uuid=True), sa.ForeignKey("technique_pages.id", ondelete="SET NULL"), nullable=True),
sa.Column("title", sa.String(500), nullable=False),
sa.Column("summary", sa.Text, nullable=False),
sa.Column("start_time", sa.Float, nullable=False),
sa.Column("end_time", sa.Float, nullable=False),
sa.Column("content_type", key_moment_content_type, nullable=False),
sa.Column("plugins", ARRAY(sa.String), nullable=True),
sa.Column("review_status", review_status, nullable=False, server_default="pending"),
sa.Column("raw_transcript", sa.Text, nullable=True),
sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
)
op.create_index("ix_key_moments_source_video_id", "key_moments", ["source_video_id"])
op.create_index("ix_key_moments_technique_page_id", "key_moments", ["technique_page_id"])
# ── related_technique_links ──────────────────────────────────────────
op.create_table(
"related_technique_links",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("source_page_id", UUID(as_uuid=True), sa.ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False),
sa.Column("target_page_id", UUID(as_uuid=True), sa.ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False),
sa.Column("relationship", relationship_type, nullable=False),
sa.UniqueConstraint("source_page_id", "target_page_id", "relationship", name="uq_technique_link"),
)
# ── tags ─────────────────────────────────────────────────────────────
op.create_table(
"tags",
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("name", sa.String(255), nullable=False, unique=True),
sa.Column("category", sa.String(255), nullable=False),
sa.Column("aliases", ARRAY(sa.String), nullable=True),
)
op.create_index("ix_tags_category", "tags", ["category"])
def downgrade() -> None:
op.drop_table("tags")
op.drop_table("related_technique_links")
op.drop_table("key_moments")
op.drop_table("technique_pages")
op.drop_table("transcript_segments")
op.drop_table("source_videos")
op.drop_table("creators")
# Drop enum types
for name in [
"relationship_type", "page_review_status", "source_quality",
"review_status", "key_moment_content_type", "processing_status",
"content_type",
]:
sa.Enum(name=name).drop(op.get_bind(), checkfirst=True)

26
backend/database.py Normal file
View file

@ -0,0 +1,26 @@
"""Database engine, session factory, and declarative base for Chrysopedia."""
import os
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase
DATABASE_URL = os.getenv(
"DATABASE_URL",
"postgresql+asyncpg://chrysopedia:changeme@localhost:5433/chrysopedia",
)
engine = create_async_engine(DATABASE_URL, echo=False, pool_pre_ping=True)
async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
class Base(DeclarativeBase):
"""Declarative base for all ORM models."""
pass
async def get_session() -> AsyncSession: # type: ignore[misc]
"""FastAPI dependency that yields an async DB session."""
async with async_session() as session:
yield session

292
backend/models.py Normal file
View file

@ -0,0 +1,292 @@
"""SQLAlchemy ORM models for the Chrysopedia knowledge base.
Seven entities matching chrysopedia-spec.md §6.1:
Creator, SourceVideo, TranscriptSegment, KeyMoment,
TechniquePage, RelatedTechniqueLink, Tag
"""
from __future__ import annotations
import enum
import uuid
from datetime import datetime, timezone
from sqlalchemy import (
Enum,
Float,
ForeignKey,
Integer,
String,
Text,
UniqueConstraint,
func,
)
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.orm import relationship as sa_relationship
from database import Base
# ── Enums ────────────────────────────────────────────────────────────────────
class ContentType(str, enum.Enum):
"""Source video content type."""
tutorial = "tutorial"
livestream = "livestream"
breakdown = "breakdown"
short_form = "short_form"
class ProcessingStatus(str, enum.Enum):
"""Pipeline processing status for a source video."""
pending = "pending"
transcribed = "transcribed"
extracted = "extracted"
reviewed = "reviewed"
published = "published"
class KeyMomentContentType(str, enum.Enum):
"""Content classification for a key moment."""
technique = "technique"
settings = "settings"
reasoning = "reasoning"
workflow = "workflow"
class ReviewStatus(str, enum.Enum):
"""Human review status for key moments."""
pending = "pending"
approved = "approved"
edited = "edited"
rejected = "rejected"
class SourceQuality(str, enum.Enum):
"""Derived source quality for technique pages."""
structured = "structured"
mixed = "mixed"
unstructured = "unstructured"
class PageReviewStatus(str, enum.Enum):
"""Review lifecycle for technique pages."""
draft = "draft"
reviewed = "reviewed"
published = "published"
class RelationshipType(str, enum.Enum):
"""Types of links between technique pages."""
same_technique_other_creator = "same_technique_other_creator"
same_creator_adjacent = "same_creator_adjacent"
general_cross_reference = "general_cross_reference"
# ── Helpers ──────────────────────────────────────────────────────────────────
def _uuid_pk() -> Mapped[uuid.UUID]:
return mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
server_default=func.gen_random_uuid(),
)
def _now() -> datetime:
return datetime.now(timezone.utc)
# ── Models ───────────────────────────────────────────────────────────────────
class Creator(Base):
__tablename__ = "creators"
id: Mapped[uuid.UUID] = _uuid_pk()
name: Mapped[str] = mapped_column(String(255), nullable=False)
slug: Mapped[str] = mapped_column(String(255), unique=True, nullable=False)
genres: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
folder_name: Mapped[str] = mapped_column(String(255), nullable=False)
view_count: Mapped[int] = mapped_column(Integer, default=0, server_default="0")
created_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now()
)
updated_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now(), onupdate=_now
)
# relationships
videos: Mapped[list[SourceVideo]] = sa_relationship(back_populates="creator")
technique_pages: Mapped[list[TechniquePage]] = sa_relationship(back_populates="creator")
class SourceVideo(Base):
__tablename__ = "source_videos"
id: Mapped[uuid.UUID] = _uuid_pk()
creator_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("creators.id", ondelete="CASCADE"), nullable=False
)
filename: Mapped[str] = mapped_column(String(500), nullable=False)
file_path: Mapped[str] = mapped_column(String(1000), nullable=False)
duration_seconds: Mapped[int] = mapped_column(Integer, nullable=True)
content_type: Mapped[ContentType] = mapped_column(
Enum(ContentType, name="content_type", create_constraint=True),
nullable=False,
)
transcript_path: Mapped[str | None] = mapped_column(String(1000), nullable=True)
processing_status: Mapped[ProcessingStatus] = mapped_column(
Enum(ProcessingStatus, name="processing_status", create_constraint=True),
default=ProcessingStatus.pending,
server_default="pending",
)
created_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now()
)
updated_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now(), onupdate=_now
)
# relationships
creator: Mapped[Creator] = sa_relationship(back_populates="videos")
segments: Mapped[list[TranscriptSegment]] = sa_relationship(back_populates="source_video")
key_moments: Mapped[list[KeyMoment]] = sa_relationship(back_populates="source_video")
class TranscriptSegment(Base):
__tablename__ = "transcript_segments"
id: Mapped[uuid.UUID] = _uuid_pk()
source_video_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False
)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
segment_index: Mapped[int] = mapped_column(Integer, nullable=False)
topic_label: Mapped[str | None] = mapped_column(String(255), nullable=True)
# relationships
source_video: Mapped[SourceVideo] = sa_relationship(back_populates="segments")
class KeyMoment(Base):
__tablename__ = "key_moments"
id: Mapped[uuid.UUID] = _uuid_pk()
source_video_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False
)
technique_page_id: Mapped[uuid.UUID | None] = mapped_column(
ForeignKey("technique_pages.id", ondelete="SET NULL"), nullable=True
)
title: Mapped[str] = mapped_column(String(500), nullable=False)
summary: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
content_type: Mapped[KeyMomentContentType] = mapped_column(
Enum(KeyMomentContentType, name="key_moment_content_type", create_constraint=True),
nullable=False,
)
plugins: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
review_status: Mapped[ReviewStatus] = mapped_column(
Enum(ReviewStatus, name="review_status", create_constraint=True),
default=ReviewStatus.pending,
server_default="pending",
)
raw_transcript: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now()
)
updated_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now(), onupdate=_now
)
# relationships
source_video: Mapped[SourceVideo] = sa_relationship(back_populates="key_moments")
technique_page: Mapped[TechniquePage | None] = sa_relationship(
back_populates="key_moments", foreign_keys=[technique_page_id]
)
class TechniquePage(Base):
__tablename__ = "technique_pages"
id: Mapped[uuid.UUID] = _uuid_pk()
creator_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("creators.id", ondelete="CASCADE"), nullable=False
)
title: Mapped[str] = mapped_column(String(500), nullable=False)
slug: Mapped[str] = mapped_column(String(500), unique=True, nullable=False)
topic_category: Mapped[str] = mapped_column(String(255), nullable=False)
topic_tags: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
summary: Mapped[str | None] = mapped_column(Text, nullable=True)
body_sections: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
signal_chains: Mapped[list | None] = mapped_column(JSONB, nullable=True)
plugins: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)
source_quality: Mapped[SourceQuality | None] = mapped_column(
Enum(SourceQuality, name="source_quality", create_constraint=True),
nullable=True,
)
view_count: Mapped[int] = mapped_column(Integer, default=0, server_default="0")
review_status: Mapped[PageReviewStatus] = mapped_column(
Enum(PageReviewStatus, name="page_review_status", create_constraint=True),
default=PageReviewStatus.draft,
server_default="draft",
)
created_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now()
)
updated_at: Mapped[datetime] = mapped_column(
default=_now, server_default=func.now(), onupdate=_now
)
# relationships
creator: Mapped[Creator] = sa_relationship(back_populates="technique_pages")
key_moments: Mapped[list[KeyMoment]] = sa_relationship(
back_populates="technique_page", foreign_keys=[KeyMoment.technique_page_id]
)
outgoing_links: Mapped[list[RelatedTechniqueLink]] = sa_relationship(
foreign_keys="RelatedTechniqueLink.source_page_id", back_populates="source_page"
)
incoming_links: Mapped[list[RelatedTechniqueLink]] = sa_relationship(
foreign_keys="RelatedTechniqueLink.target_page_id", back_populates="target_page"
)
class RelatedTechniqueLink(Base):
__tablename__ = "related_technique_links"
__table_args__ = (
UniqueConstraint("source_page_id", "target_page_id", "relationship", name="uq_technique_link"),
)
id: Mapped[uuid.UUID] = _uuid_pk()
source_page_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False
)
target_page_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False
)
relationship: Mapped[RelationshipType] = mapped_column(
Enum(RelationshipType, name="relationship_type", create_constraint=True),
nullable=False,
)
# relationships
source_page: Mapped[TechniquePage] = sa_relationship(
foreign_keys=[source_page_id], back_populates="outgoing_links"
)
target_page: Mapped[TechniquePage] = sa_relationship(
foreign_keys=[target_page_id], back_populates="incoming_links"
)
class Tag(Base):
__tablename__ = "tags"
id: Mapped[uuid.UUID] = _uuid_pk()
name: Mapped[str] = mapped_column(String(255), unique=True, nullable=False)
category: Mapped[str] = mapped_column(String(255), nullable=False)
aliases: Mapped[list[str] | None] = mapped_column(ARRAY(String), nullable=True)

View file

@ -10,7 +10,7 @@ services:
restart: unless-stopped restart: unless-stopped
environment: environment:
POSTGRES_USER: ${POSTGRES_USER:-chrysopedia} POSTGRES_USER: ${POSTGRES_USER:-chrysopedia}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD required} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme}
POSTGRES_DB: ${POSTGRES_DB:-chrysopedia} POSTGRES_DB: ${POSTGRES_DB:-chrysopedia}
volumes: volumes:
- /vmPool/r/services/chrysopedia_db:/var/lib/postgresql/data - /vmPool/r/services/chrysopedia_db:/var/lib/postgresql/data