chrysopedia/backend/config.py
jlightner 4aa4b08a7f feat: Per-stage LLM model routing with thinking modality and think-tag stripping
- Added 8 per-stage config fields: llm_stage{2-5}_model and llm_stage{2-5}_modality
- LLMClient.complete() accepts modality ('chat'/'thinking') and model_override
- Thinking modality: appends JSON instructions to system prompt, strips <think> tags
- strip_think_tags() handles multiline, multiple blocks, and edge cases
- Pipeline stages 2-5 read per-stage config and pass to LLM client
- Updated .env.example with per-stage model/modality documentation
- All 59 tests pass including new think-tag stripping test
2026-03-30 02:12:14 +00:00

75 lines
2.3 KiB
Python

"""Application configuration loaded from environment variables."""
from functools import lru_cache
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Chrysopedia API settings.
Values are loaded from environment variables (or .env file via
pydantic-settings' dotenv support).
"""
# Database
database_url: str = "postgresql+asyncpg://chrysopedia:changeme@localhost:5433/chrysopedia"
# Redis
redis_url: str = "redis://localhost:6379/0"
# Application
app_env: str = "development"
app_log_level: str = "info"
app_secret_key: str = "changeme-generate-a-real-secret"
# CORS
cors_origins: list[str] = ["*"]
# LLM endpoint (OpenAI-compatible)
llm_api_url: str = "http://localhost:11434/v1"
llm_api_key: str = "sk-placeholder"
llm_model: str = "qwen2.5:14b-q8_0"
llm_fallback_url: str = "http://localhost:11434/v1"
llm_fallback_model: str = "qwen2.5:14b-q8_0"
# Per-stage model overrides (optional — falls back to llm_model / "chat")
llm_stage2_model: str | None = None # segmentation — fast chat model recommended
llm_stage2_modality: str = "chat" # "chat" or "thinking"
llm_stage3_model: str | None = None # extraction — thinking model recommended
llm_stage3_modality: str = "chat"
llm_stage4_model: str | None = None # classification — fast chat model recommended
llm_stage4_modality: str = "chat"
llm_stage5_model: str | None = None # synthesis — thinking model recommended
llm_stage5_modality: str = "chat"
# Embedding endpoint
embedding_api_url: str = "http://localhost:11434/v1"
embedding_model: str = "nomic-embed-text"
embedding_dimensions: int = 768
# Qdrant
qdrant_url: str = "http://localhost:6333"
qdrant_collection: str = "chrysopedia"
# Prompt templates
prompts_path: str = "./prompts"
# Review mode — when True, extracted moments go to review queue before publishing
review_mode: bool = True
# File storage
transcript_storage_path: str = "/data/transcripts"
video_metadata_path: str = "/data/video_meta"
model_config = {
"env_file": ".env",
"env_file_encoding": "utf-8",
"case_sensitive": False,
}
@lru_cache
def get_settings() -> Settings:
"""Return cached application settings (singleton)."""
return Settings()