- "backend/models.py" - "backend/database.py" - "alembic/versions/001_initial.py" - "alembic/env.py" - "alembic.ini" - "alembic/script.py.mako" - "docker-compose.yml" - ".gsd/KNOWLEDGE.md" GSD-Task: S01/T02
171 lines
9.2 KiB
Python
171 lines
9.2 KiB
Python
"""initial schema — 7 core entities
|
|
|
|
Revision ID: 001_initial
|
|
Revises:
|
|
Create Date: 2026-03-29
|
|
"""
|
|
from typing import Sequence, Union
|
|
|
|
from alembic import op
|
|
import sqlalchemy as sa
|
|
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
|
|
|
|
# revision identifiers, used by Alembic.
|
|
revision: str = "001_initial"
|
|
down_revision: Union[str, None] = None
|
|
branch_labels: Union[str, Sequence[str], None] = None
|
|
depends_on: Union[str, Sequence[str], None] = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
# ── Enum types ───────────────────────────────────────────────────────
|
|
content_type = sa.Enum(
|
|
"tutorial", "livestream", "breakdown", "short_form",
|
|
name="content_type",
|
|
)
|
|
processing_status = sa.Enum(
|
|
"pending", "transcribed", "extracted", "reviewed", "published",
|
|
name="processing_status",
|
|
)
|
|
key_moment_content_type = sa.Enum(
|
|
"technique", "settings", "reasoning", "workflow",
|
|
name="key_moment_content_type",
|
|
)
|
|
review_status = sa.Enum(
|
|
"pending", "approved", "edited", "rejected",
|
|
name="review_status",
|
|
)
|
|
source_quality = sa.Enum(
|
|
"structured", "mixed", "unstructured",
|
|
name="source_quality",
|
|
)
|
|
page_review_status = sa.Enum(
|
|
"draft", "reviewed", "published",
|
|
name="page_review_status",
|
|
)
|
|
relationship_type = sa.Enum(
|
|
"same_technique_other_creator", "same_creator_adjacent", "general_cross_reference",
|
|
name="relationship_type",
|
|
)
|
|
|
|
# ── creators ─────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"creators",
|
|
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("name", sa.String(255), nullable=False),
|
|
sa.Column("slug", sa.String(255), nullable=False, unique=True),
|
|
sa.Column("genres", ARRAY(sa.String), nullable=True),
|
|
sa.Column("folder_name", sa.String(255), nullable=False),
|
|
sa.Column("view_count", sa.Integer, nullable=False, server_default="0"),
|
|
sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
|
|
sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
|
|
)
|
|
|
|
# ── source_videos ────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"source_videos",
|
|
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("creator_id", UUID(as_uuid=True), sa.ForeignKey("creators.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("filename", sa.String(500), nullable=False),
|
|
sa.Column("file_path", sa.String(1000), nullable=False),
|
|
sa.Column("duration_seconds", sa.Integer, nullable=True),
|
|
sa.Column("content_type", content_type, nullable=False),
|
|
sa.Column("transcript_path", sa.String(1000), nullable=True),
|
|
sa.Column("processing_status", processing_status, nullable=False, server_default="pending"),
|
|
sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
|
|
sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
|
|
)
|
|
op.create_index("ix_source_videos_creator_id", "source_videos", ["creator_id"])
|
|
|
|
# ── transcript_segments ──────────────────────────────────────────────
|
|
op.create_table(
|
|
"transcript_segments",
|
|
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("source_video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("start_time", sa.Float, nullable=False),
|
|
sa.Column("end_time", sa.Float, nullable=False),
|
|
sa.Column("text", sa.Text, nullable=False),
|
|
sa.Column("segment_index", sa.Integer, nullable=False),
|
|
sa.Column("topic_label", sa.String(255), nullable=True),
|
|
)
|
|
op.create_index("ix_transcript_segments_video_id", "transcript_segments", ["source_video_id"])
|
|
|
|
# ── technique_pages (must come before key_moments due to FK) ─────────
|
|
op.create_table(
|
|
"technique_pages",
|
|
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("creator_id", UUID(as_uuid=True), sa.ForeignKey("creators.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("title", sa.String(500), nullable=False),
|
|
sa.Column("slug", sa.String(500), nullable=False, unique=True),
|
|
sa.Column("topic_category", sa.String(255), nullable=False),
|
|
sa.Column("topic_tags", ARRAY(sa.String), nullable=True),
|
|
sa.Column("summary", sa.Text, nullable=True),
|
|
sa.Column("body_sections", JSONB, nullable=True),
|
|
sa.Column("signal_chains", JSONB, nullable=True),
|
|
sa.Column("plugins", ARRAY(sa.String), nullable=True),
|
|
sa.Column("source_quality", source_quality, nullable=True),
|
|
sa.Column("view_count", sa.Integer, nullable=False, server_default="0"),
|
|
sa.Column("review_status", page_review_status, nullable=False, server_default="draft"),
|
|
sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
|
|
sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
|
|
)
|
|
op.create_index("ix_technique_pages_creator_id", "technique_pages", ["creator_id"])
|
|
op.create_index("ix_technique_pages_topic_category", "technique_pages", ["topic_category"])
|
|
|
|
# ── key_moments ──────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"key_moments",
|
|
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("source_video_id", UUID(as_uuid=True), sa.ForeignKey("source_videos.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("technique_page_id", UUID(as_uuid=True), sa.ForeignKey("technique_pages.id", ondelete="SET NULL"), nullable=True),
|
|
sa.Column("title", sa.String(500), nullable=False),
|
|
sa.Column("summary", sa.Text, nullable=False),
|
|
sa.Column("start_time", sa.Float, nullable=False),
|
|
sa.Column("end_time", sa.Float, nullable=False),
|
|
sa.Column("content_type", key_moment_content_type, nullable=False),
|
|
sa.Column("plugins", ARRAY(sa.String), nullable=True),
|
|
sa.Column("review_status", review_status, nullable=False, server_default="pending"),
|
|
sa.Column("raw_transcript", sa.Text, nullable=True),
|
|
sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
|
|
sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()),
|
|
)
|
|
op.create_index("ix_key_moments_source_video_id", "key_moments", ["source_video_id"])
|
|
op.create_index("ix_key_moments_technique_page_id", "key_moments", ["technique_page_id"])
|
|
|
|
# ── related_technique_links ──────────────────────────────────────────
|
|
op.create_table(
|
|
"related_technique_links",
|
|
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("source_page_id", UUID(as_uuid=True), sa.ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("target_page_id", UUID(as_uuid=True), sa.ForeignKey("technique_pages.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("relationship", relationship_type, nullable=False),
|
|
sa.UniqueConstraint("source_page_id", "target_page_id", "relationship", name="uq_technique_link"),
|
|
)
|
|
|
|
# ── tags ─────────────────────────────────────────────────────────────
|
|
op.create_table(
|
|
"tags",
|
|
sa.Column("id", UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("name", sa.String(255), nullable=False, unique=True),
|
|
sa.Column("category", sa.String(255), nullable=False),
|
|
sa.Column("aliases", ARRAY(sa.String), nullable=True),
|
|
)
|
|
op.create_index("ix_tags_category", "tags", ["category"])
|
|
|
|
|
|
def downgrade() -> None:
|
|
op.drop_table("tags")
|
|
op.drop_table("related_technique_links")
|
|
op.drop_table("key_moments")
|
|
op.drop_table("technique_pages")
|
|
op.drop_table("transcript_segments")
|
|
op.drop_table("source_videos")
|
|
op.drop_table("creators")
|
|
|
|
# Drop enum types
|
|
for name in [
|
|
"relationship_type", "page_review_status", "source_quality",
|
|
"review_status", "key_moment_content_type", "processing_status",
|
|
"content_type",
|
|
]:
|
|
sa.Enum(name=name).drop(op.get_bind(), checkfirst=True)
|