chrysopedia/backend/schemas.py
jlightner f0f36a3f76 feat: Added MinIO Docker service, Post/PostAttachment models with migra…
- "docker-compose.yml"
- "backend/config.py"
- "backend/minio_client.py"
- "backend/models.py"
- "backend/schemas.py"
- "backend/requirements.txt"
- "docker/nginx.conf"
- "alembic/versions/024_add_posts_and_attachments.py"

GSD-Task: S01/T01
2026-04-04 09:02:40 +00:00

819 lines
25 KiB
Python

"""Pydantic schemas for the Chrysopedia API.
Read-only schemas for list/detail endpoints and input schemas for creation.
Each schema mirrors the corresponding SQLAlchemy model in models.py.
"""
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, ConfigDict, Field
# ── Health ───────────────────────────────────────────────────────────────────
class HealthResponse(BaseModel):
status: str = "ok"
service: str = "chrysopedia-api"
version: str = "0.1.0"
database: str = "unknown"
# ── Creator ──────────────────────────────────────────────────────────────────
class CreatorBase(BaseModel):
name: str
slug: str
genres: list[str] | None = None
folder_name: str
class CreatorCreate(CreatorBase):
pass
class CreatorRead(CreatorBase):
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
avatar_url: str | None = None
view_count: int = 0
created_at: datetime
updated_at: datetime
class CreatorTechniqueItem(BaseModel):
"""Minimal technique page info for creator detail."""
title: str
slug: str
topic_category: str
created_at: datetime
summary: str | None = None
topic_tags: list[str] | None = None
key_moment_count: int = 0
class CreatorDetail(CreatorRead):
"""Full creator profile for landing page."""
bio: str | None = None
social_links: dict | None = None
featured: bool = False
video_count: int = 0
technique_count: int = 0
moment_count: int = 0
follower_count: int = 0
personality_profile: dict | None = None
techniques: list[CreatorTechniqueItem] = []
genre_breakdown: dict[str, int] = {}
class CreatorProfileUpdate(BaseModel):
"""Admin update payload for creator profile fields."""
bio: str | None = None
social_links: dict | None = None
featured: bool | None = None
avatar_url: str | None = None
# ── SourceVideo ──────────────────────────────────────────────────────────────
class SourceVideoBase(BaseModel):
filename: str
file_path: str
duration_seconds: int | None = None
content_type: str
transcript_path: str | None = None
class SourceVideoCreate(SourceVideoBase):
creator_id: uuid.UUID
class SourceVideoRead(SourceVideoBase):
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
creator_id: uuid.UUID
content_hash: str | None = None
processing_status: str = "not_started"
created_at: datetime
updated_at: datetime
# ── TranscriptSegment ────────────────────────────────────────────────────────
class TranscriptSegmentBase(BaseModel):
start_time: float
end_time: float
text: str
segment_index: int
topic_label: str | None = None
class TranscriptSegmentCreate(TranscriptSegmentBase):
source_video_id: uuid.UUID
class TranscriptSegmentRead(TranscriptSegmentBase):
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
source_video_id: uuid.UUID
# ── KeyMoment ────────────────────────────────────────────────────────────────
class KeyMomentBase(BaseModel):
title: str
summary: str
start_time: float
end_time: float
content_type: str
plugins: list[str] | None = None
raw_transcript: str | None = None
class KeyMomentCreate(KeyMomentBase):
source_video_id: uuid.UUID
technique_page_id: uuid.UUID | None = None
class KeyMomentRead(KeyMomentBase):
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
source_video_id: uuid.UUID
technique_page_id: uuid.UUID | None = None
created_at: datetime
updated_at: datetime
# ── TechniquePage ────────────────────────────────────────────────────────────
class TechniquePageBase(BaseModel):
title: str
slug: str
topic_category: str
topic_tags: list[str] | None = None
summary: str | None = None
body_sections: list | dict | None = None
body_sections_format: str = "v1"
signal_chains: list | None = None
plugins: list[str] | None = None
class TechniquePageCreate(TechniquePageBase):
creator_id: uuid.UUID
source_quality: str | None = None
class TechniquePageRead(TechniquePageBase):
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
creator_id: uuid.UUID
creator_name: str = ""
creator_slug: str = ""
source_quality: str | None = None
view_count: int = 0
key_moment_count: int = 0
created_at: datetime
updated_at: datetime
# ── RelatedTechniqueLink ─────────────────────────────────────────────────────
class RelatedTechniqueLinkBase(BaseModel):
source_page_id: uuid.UUID
target_page_id: uuid.UUID
relationship: str
class RelatedTechniqueLinkCreate(RelatedTechniqueLinkBase):
pass
class RelatedTechniqueLinkRead(RelatedTechniqueLinkBase):
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
# ── Tag ──────────────────────────────────────────────────────────────────────
class TagBase(BaseModel):
name: str
category: str
aliases: list[str] | None = None
class TagCreate(TagBase):
pass
class TagRead(TagBase):
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
# ── Transcript Ingestion ─────────────────────────────────────────────────────
class TranscriptIngestResponse(BaseModel):
"""Response returned after successfully ingesting a transcript."""
video_id: uuid.UUID
creator_id: uuid.UUID
creator_name: str
filename: str
segments_stored: int
processing_status: str
is_reupload: bool
content_hash: str
# ── Pagination wrapper ───────────────────────────────────────────────────────
class PaginatedResponse(BaseModel):
"""Generic paginated list response."""
items: list = Field(default_factory=list)
total: int = 0
offset: int = 0
limit: int = 50
# ── Search ───────────────────────────────────────────────────────────────────
class SearchResultItem(BaseModel):
"""A single search result."""
title: str
slug: str = ""
technique_page_slug: str = ""
type: str = ""
score: float = 0.0
summary: str = ""
creator_name: str = ""
creator_slug: str = ""
topic_category: str = ""
topic_tags: list[str] = Field(default_factory=list)
match_context: str = ""
section_anchor: str = ""
section_heading: str = ""
class SearchResponse(BaseModel):
"""Top-level search response with metadata."""
items: list[SearchResultItem] = Field(default_factory=list)
partial_matches: list[SearchResultItem] = Field(default_factory=list)
total: int = 0
query: str = ""
fallback_used: bool = False
cascade_tier: str = ""
class SuggestionItem(BaseModel):
"""A single autocomplete suggestion."""
text: str
type: Literal["topic", "technique", "creator"]
class SuggestionsResponse(BaseModel):
"""Popular search suggestions for autocomplete."""
suggestions: list[SuggestionItem] = Field(default_factory=list)
class PopularSearchItem(BaseModel):
"""A single popular search query with occurrence count."""
query: str
count: int
class PopularSearchesResponse(BaseModel):
"""Response for the popular searches endpoint."""
items: list[PopularSearchItem] = Field(default_factory=list)
cached: bool = False
# ── Technique Page Detail ────────────────────────────────────────────────────
class KeyMomentSummary(BaseModel):
"""Lightweight key moment for technique page detail."""
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
title: str
summary: str
start_time: float
end_time: float
content_type: str
plugins: list[str] | None = None
source_video_id: uuid.UUID | None = None
video_filename: str = ""
class RelatedLinkItem(BaseModel):
"""A related technique link with target info."""
model_config = ConfigDict(from_attributes=True)
target_title: str = ""
target_slug: str = ""
relationship: str = ""
creator_name: str = ""
topic_category: str = ""
reason: str = ""
class CreatorInfo(BaseModel):
"""Minimal creator info embedded in technique detail."""
model_config = ConfigDict(from_attributes=True)
name: str
slug: str
genres: list[str] | None = None
avatar_url: str | None = None
class SourceVideoSummary(BaseModel):
"""Lightweight source video info for technique page detail."""
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
filename: str
content_type: str
added_at: datetime | None = None
class TechniquePageDetail(TechniquePageRead):
"""Technique page with nested key moments, creator, and related links."""
key_moments: list[KeyMomentSummary] = Field(default_factory=list)
creator_info: CreatorInfo | None = None
related_links: list[RelatedLinkItem] = Field(default_factory=list)
version_count: int = 0
source_videos: list[SourceVideoSummary] = Field(default_factory=list)
# ── Technique Page Versions ──────────────────────────────────────────────────
class TechniquePageVersionSummary(BaseModel):
"""Lightweight version entry for list responses."""
model_config = ConfigDict(from_attributes=True)
version_number: int
created_at: datetime
pipeline_metadata: dict | None = None
class TechniquePageVersionDetail(BaseModel):
"""Full version snapshot for detail responses."""
model_config = ConfigDict(from_attributes=True)
version_number: int
content_snapshot: dict
pipeline_metadata: dict | None = None
created_at: datetime
class TechniquePageVersionListResponse(BaseModel):
"""Response for version list endpoint."""
items: list[TechniquePageVersionSummary] = Field(default_factory=list)
total: int = 0
# ── Topics ───────────────────────────────────────────────────────────────────
class TopicSubTopic(BaseModel):
"""A sub-topic with aggregated counts."""
name: str
technique_count: int = 0
creator_count: int = 0
class TopicCategory(BaseModel):
"""A top-level topic category with sub-topics."""
name: str
description: str = ""
sub_topics: list[TopicSubTopic] = Field(default_factory=list)
class TopicListResponse(BaseModel):
"""Paginated list of topic categories."""
items: list[TopicCategory] = Field(default_factory=list)
total: int = 0
class SourceVideoDetail(SourceVideoRead):
"""Single video detail with creator info for player page."""
creator_name: str = ""
creator_slug: str = ""
video_url: str | None = None
class TranscriptForPlayerResponse(BaseModel):
"""Transcript segments for the video player sidebar."""
video_id: uuid.UUID
segments: list[TranscriptSegmentRead] = Field(default_factory=list)
total: int = 0
class VideoListResponse(BaseModel):
"""Paginated list of source videos."""
items: list[SourceVideoRead] = Field(default_factory=list)
total: int = 0
offset: int = 0
limit: int = 50
# ── Creator Browse ───────────────────────────────────────────────────────────
class CreatorBrowseItem(CreatorRead):
"""Creator with technique and video counts for browse pages."""
technique_count: int = 0
video_count: int = 0
last_technique_at: datetime | None = None
# ── Content Reports ──────────────────────────────────────────────────────────
class ContentReportCreate(BaseModel):
"""Public submission: report a content issue."""
content_type: str = Field(
..., description="Entity type: technique_page, key_moment, creator, general"
)
content_id: uuid.UUID | None = Field(
None, description="ID of the reported entity (null for general reports)"
)
content_title: str | None = Field(
None, description="Title of the reported content (for display context)"
)
report_type: str = Field(
..., description="inaccurate, missing_info, wrong_attribution, formatting, other"
)
description: str = Field(
..., min_length=10, max_length=2000,
description="Description of the issue"
)
page_url: str | None = Field(
None, description="URL the user was on when reporting"
)
class ContentReportRead(BaseModel):
"""Full report for admin views."""
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
content_type: str
content_id: uuid.UUID | None = None
content_title: str | None = None
report_type: str
description: str
status: str = "open"
admin_notes: str | None = None
page_url: str | None = None
created_at: datetime
resolved_at: datetime | None = None
class ContentReportUpdate(BaseModel):
"""Admin update: change status and/or add notes."""
status: str | None = Field(
None, description="open, acknowledged, resolved, dismissed"
)
admin_notes: str | None = Field(
None, max_length=2000, description="Admin notes about resolution"
)
class ContentReportListResponse(BaseModel):
"""Paginated list of content reports."""
items: list[ContentReportRead] = Field(default_factory=list)
total: int = 0
offset: int = 0
limit: int = 50
# ── Pipeline Debug Mode ─────────────────────────────────────────────────────
class DebugModeResponse(BaseModel):
"""Current debug mode status."""
debug_mode: bool
class DebugModeUpdate(BaseModel):
"""Toggle debug mode on/off."""
debug_mode: bool
class TokenStageSummary(BaseModel):
"""Per-stage token usage aggregation."""
stage: str
call_count: int
total_prompt_tokens: int
total_completion_tokens: int
total_tokens: int
class TokenSummaryResponse(BaseModel):
"""Token usage summary for a video, broken down by stage."""
video_id: str
stages: list[TokenStageSummary] = Field(default_factory=list)
grand_total_tokens: int
# ── Admin: Technique Pages ───────────────────────────────────────────────────
class AdminTechniquePageItem(BaseModel):
"""Technique page with aggregated source/version counts for admin view."""
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
title: str
slug: str
creator_name: str
creator_slug: str
topic_category: str
body_sections_format: str
source_video_count: int = 0
version_count: int = 0
created_at: datetime
updated_at: datetime
class AdminTechniquePageListResponse(BaseModel):
"""Paginated list of technique pages for admin view."""
items: list[AdminTechniquePageItem] = Field(default_factory=list)
total: int = 0
offset: int = 0
limit: int = 50
# ── Auth ─────────────────────────────────────────────────────────────────────
class RegisterRequest(BaseModel):
"""Registration payload — requires a valid invite code."""
email: str = Field(..., min_length=3, max_length=255)
password: str = Field(..., min_length=8, max_length=128)
display_name: str = Field(..., min_length=1, max_length=255)
invite_code: str = Field(..., min_length=1, max_length=100)
creator_slug: str | None = Field(None, max_length=255)
class LoginRequest(BaseModel):
"""Login payload."""
email: str
password: str
class TokenResponse(BaseModel):
"""JWT token response."""
access_token: str
token_type: str = "bearer"
class UserResponse(BaseModel):
"""Public user profile response."""
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
email: str
display_name: str
role: str
creator_id: uuid.UUID | None = None
is_active: bool = True
created_at: datetime
impersonating: bool = False
class UpdateProfileRequest(BaseModel):
"""Self-service profile update."""
display_name: str | None = Field(None, min_length=1, max_length=255)
current_password: str | None = None
new_password: str | None = Field(None, min_length=8, max_length=128)
# ── Consent ──────────────────────────────────────────────────────────────────
class VideoConsentUpdate(BaseModel):
"""Partial update — only non-None fields trigger changes."""
kb_inclusion: bool | None = None
training_usage: bool | None = None
public_display: bool | None = None
class VideoConsentRead(BaseModel):
"""Current consent state for a video."""
model_config = ConfigDict(from_attributes=True)
source_video_id: uuid.UUID
video_filename: str = ""
creator_id: uuid.UUID
kb_inclusion: bool = False
training_usage: bool = False
public_display: bool = True
updated_at: datetime
class ConsentAuditEntry(BaseModel):
"""Single audit trail entry for a consent change."""
model_config = ConfigDict(from_attributes=True)
version: int
field_name: str
old_value: bool | None = None
new_value: bool
changed_by: uuid.UUID
created_at: datetime
class ConsentListResponse(BaseModel):
"""Paginated list of video consent records."""
items: list[VideoConsentRead] = Field(default_factory=list)
total: int = 0
class ConsentSummary(BaseModel):
"""Aggregate consent flag counts across all videos."""
total_videos: int = 0
kb_inclusion_granted: int = 0
training_usage_granted: int = 0
public_display_granted: int = 0
# ── Creator Dashboard ────────────────────────────────────────────────────────
class CreatorDashboardTechnique(BaseModel):
"""Technique page summary for creator dashboard."""
title: str
slug: str
topic_category: str
created_at: datetime
key_moment_count: int = 0
class CreatorDashboardVideo(BaseModel):
"""Source video summary for creator dashboard."""
filename: str
processing_status: str
created_at: datetime
class CreatorDashboardStats(BaseModel):
"""Aggregate counts for dashboard header."""
video_count: int = 0
technique_count: int = 0
key_moment_count: int = 0
search_impressions: int = 0
class CreatorDashboardResponse(BaseModel):
"""Full creator dashboard payload."""
video_count: int = 0
technique_count: int = 0
key_moment_count: int = 0
search_impressions: int = 0
techniques: list[CreatorDashboardTechnique] = Field(default_factory=list)
videos: list[CreatorDashboardVideo] = Field(default_factory=list)
# ── Chapter Markers (for media player timeline) ─────────────────────────────
class ChapterMarkerRead(BaseModel):
"""A chapter marker derived from a KeyMoment for the player timeline."""
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
title: str
start_time: float
end_time: float
content_type: str
chapter_status: str = "draft"
sort_order: int = 0
class ChaptersResponse(BaseModel):
"""Chapters (KeyMoments) for a video, sorted by start_time."""
video_id: uuid.UUID
chapters: list[ChapterMarkerRead] = Field(default_factory=list)
# ── Creator Chapter Management ──────────────────────────────────────────────
class ChapterUpdate(BaseModel):
"""Partial update for a single chapter."""
title: str | None = None
start_time: float | None = None
end_time: float | None = None
chapter_status: str | None = None
class ChapterReorderItem(BaseModel):
id: uuid.UUID
sort_order: int
class ChapterReorderRequest(BaseModel):
"""Reorder chapters for a video."""
chapters: list[ChapterReorderItem]
class ChapterBulkApproveRequest(BaseModel):
"""Bulk-approve chapters by IDs."""
chapter_ids: list[uuid.UUID]
# ── Follow System ────────────────────────────────────────────────────────────
class FollowResponse(BaseModel):
"""Response after follow/unfollow action."""
followed: bool
creator_id: uuid.UUID
follower_count: int
class FollowStatusResponse(BaseModel):
"""Whether the current user is following a creator."""
following: bool
creator_id: uuid.UUID
class FollowedCreatorItem(BaseModel):
"""A creator the current user follows."""
model_config = ConfigDict(from_attributes=True)
creator_id: uuid.UUID
creator_name: str
creator_slug: str
followed_at: datetime
# ── Personality Profile (LLM output validation) ─────────────────────────────
class VocabularyProfile(BaseModel):
signature_phrases: list[str] = []
jargon_level: str = "mixed"
filler_words: list[str] = []
distinctive_terms: list[str] = []
sound_descriptions: list[str] = []
class ToneProfile(BaseModel):
formality: str = "conversational"
energy: str = "moderate"
humor: str = "none"
teaching_style: str = ""
descriptors: list[str] = []
class StyleMarkersProfile(BaseModel):
explanation_approach: str = "step-by-step"
uses_analogies: bool = False
analogy_examples: list[str] = []
sound_words: list[str] = []
self_references: str = ""
audience_engagement: str = ""
pacing: str = "moderate"
class PersonalityProfile(BaseModel):
"""Validates LLM-generated personality profile before storage."""
vocabulary: VocabularyProfile = Field(default_factory=VocabularyProfile)
tone: ToneProfile = Field(default_factory=ToneProfile)
style_markers: StyleMarkersProfile = Field(default_factory=StyleMarkersProfile)
summary: str = ""
# ── Posts (Creator content feed) ─────────────────────────────────────────────
class PostAttachmentRead(BaseModel):
"""Read schema for a file attachment on a post."""
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
filename: str
content_type: str
size_bytes: int
download_url: str | None = None
created_at: datetime
class PostCreate(BaseModel):
"""Create a new post."""
title: str = Field(..., min_length=1, max_length=500)
body_json: dict
is_published: bool = False
class PostUpdate(BaseModel):
"""Partial update for an existing post."""
title: str | None = Field(None, min_length=1, max_length=500)
body_json: dict | None = None
is_published: bool | None = None
class PostRead(BaseModel):
"""Full post with attachments."""
model_config = ConfigDict(from_attributes=True)
id: uuid.UUID
creator_id: uuid.UUID
title: str
body_json: dict
is_published: bool = False
created_at: datetime
updated_at: datetime
attachments: list[PostAttachmentRead] = Field(default_factory=list)
class PostListResponse(BaseModel):
"""Paginated list of posts."""
items: list[PostRead] = Field(default_factory=list)
total: int = 0