Auto-mode commit 7aa33cd accidentally deleted 78 files (14,814 lines) during M005
execution. Subsequent commits rebuilt some frontend files but backend/, alembic/,
tests/, whisper/, docker configs, and prompts were never restored in this repo.
This commit restores the full project tree by syncing from ub01's working directory,
which has all M001-M007 features running in production containers.
Restored: backend/ (config, models, routers, database, redis, search_service, worker),
alembic/ (6 migrations), docker/ (Dockerfiles, nginx, compose), prompts/ (4 stages),
tests/, whisper/, README.md, .env.example, chrysopedia-spec.md
99 lines
4.1 KiB
Python
99 lines
4.1 KiB
Python
"""Pydantic schemas for pipeline stage inputs and outputs.
|
|
|
|
Stage 2 — Segmentation: groups transcript segments by topic.
|
|
Stage 3 — Extraction: extracts key moments from segments.
|
|
Stage 4 — Classification: classifies moments by category/tags.
|
|
Stage 5 — Synthesis: generates technique pages from classified moments.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
# ── Stage 2: Segmentation ───────────────────────────────────────────────────
|
|
|
|
class TopicSegment(BaseModel):
|
|
"""A contiguous group of transcript segments sharing a topic."""
|
|
|
|
start_index: int = Field(description="First transcript segment index in this group")
|
|
end_index: int = Field(description="Last transcript segment index in this group (inclusive)")
|
|
topic_label: str = Field(description="Short label describing the topic")
|
|
summary: str = Field(description="Brief summary of what is discussed")
|
|
|
|
|
|
class SegmentationResult(BaseModel):
|
|
"""Full output of stage 2 (segmentation)."""
|
|
|
|
segments: list[TopicSegment]
|
|
|
|
|
|
# ── Stage 3: Extraction ─────────────────────────────────────────────────────
|
|
|
|
class ExtractedMoment(BaseModel):
|
|
"""A single key moment extracted from a topic segment group."""
|
|
|
|
title: str = Field(description="Concise title for the moment")
|
|
summary: str = Field(description="Detailed summary of the technique/concept")
|
|
start_time: float = Field(description="Start time in seconds")
|
|
end_time: float = Field(description="End time in seconds")
|
|
content_type: str = Field(description="One of: technique, settings, reasoning, workflow")
|
|
plugins: list[str] = Field(default_factory=list, description="Plugins/tools mentioned")
|
|
raw_transcript: str = Field(default="", description="Raw transcript text for this moment")
|
|
|
|
|
|
class ExtractionResult(BaseModel):
|
|
"""Full output of stage 3 (extraction)."""
|
|
|
|
moments: list[ExtractedMoment]
|
|
|
|
|
|
# ── Stage 4: Classification ─────────────────────────────────────────────────
|
|
|
|
class ClassifiedMoment(BaseModel):
|
|
"""Classification metadata for a single extracted moment."""
|
|
|
|
moment_index: int = Field(description="Index into ExtractionResult.moments")
|
|
topic_category: str = Field(description="High-level topic category")
|
|
topic_tags: list[str] = Field(default_factory=list, description="Specific topic tags")
|
|
content_type_override: str | None = Field(
|
|
default=None,
|
|
description="Override for content_type if classification disagrees with extraction",
|
|
)
|
|
|
|
|
|
class ClassificationResult(BaseModel):
|
|
"""Full output of stage 4 (classification)."""
|
|
|
|
classifications: list[ClassifiedMoment]
|
|
|
|
|
|
# ── Stage 5: Synthesis ───────────────────────────────────────────────────────
|
|
|
|
class SynthesizedPage(BaseModel):
|
|
"""A technique page synthesized from classified moments."""
|
|
|
|
title: str = Field(description="Page title")
|
|
slug: str = Field(description="URL-safe slug")
|
|
topic_category: str = Field(description="Primary topic category")
|
|
topic_tags: list[str] = Field(default_factory=list, description="Associated tags")
|
|
summary: str = Field(description="Page summary / overview paragraph")
|
|
body_sections: dict = Field(
|
|
default_factory=dict,
|
|
description="Structured body content as section_name -> content mapping",
|
|
)
|
|
signal_chains: list[dict] = Field(
|
|
default_factory=list,
|
|
description="Signal chain descriptions (for audio/music production contexts)",
|
|
)
|
|
plugins: list[str] = Field(default_factory=list, description="Plugins/tools referenced")
|
|
source_quality: str = Field(
|
|
default="mixed",
|
|
description="One of: structured, mixed, unstructured",
|
|
)
|
|
|
|
|
|
class SynthesisResult(BaseModel):
|
|
"""Full output of stage 5 (synthesis)."""
|
|
|
|
pages: list[SynthesizedPage]
|