chrysopedia/backend/config.py
jlightner 6fb497d03a chore: Bump LLM max_tokens to 32768, commit M002/M003 GSD artifacts
- max_tokens bumped from 16384 to 32768 (extraction responses still hitting limits)
- All GSD planning/completion artifacts for M002 (deployment) and M003 (DNS + LLM routing)
- KNOWLEDGE.md updated with XPLTD domain setup flow and container healthcheck patterns
- DECISIONS.md updated with D015 (subnet) and D016 (Ollama for embeddings)
2026-03-30 04:22:45 +00:00

78 lines
2.5 KiB
Python

"""Application configuration loaded from environment variables."""
from functools import lru_cache
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Chrysopedia API settings.
Values are loaded from environment variables (or .env file via
pydantic-settings' dotenv support).
"""
# Database
database_url: str = "postgresql+asyncpg://chrysopedia:changeme@localhost:5433/chrysopedia"
# Redis
redis_url: str = "redis://localhost:6379/0"
# Application
app_env: str = "development"
app_log_level: str = "info"
app_secret_key: str = "changeme-generate-a-real-secret"
# CORS
cors_origins: list[str] = ["*"]
# LLM endpoint (OpenAI-compatible)
llm_api_url: str = "http://localhost:11434/v1"
llm_api_key: str = "sk-placeholder"
llm_model: str = "qwen2.5:14b-q8_0"
llm_fallback_url: str = "http://localhost:11434/v1"
llm_fallback_model: str = "qwen2.5:14b-q8_0"
# Per-stage model overrides (optional — falls back to llm_model / "chat")
llm_stage2_model: str | None = None # segmentation — fast chat model recommended
llm_stage2_modality: str = "chat" # "chat" or "thinking"
llm_stage3_model: str | None = None # extraction — thinking model recommended
llm_stage3_modality: str = "chat"
llm_stage4_model: str | None = None # classification — fast chat model recommended
llm_stage4_modality: str = "chat"
llm_stage5_model: str | None = None # synthesis — thinking model recommended
llm_stage5_modality: str = "chat"
# Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON)
llm_max_tokens: int = 32768
# Embedding endpoint
embedding_api_url: str = "http://localhost:11434/v1"
embedding_model: str = "nomic-embed-text"
embedding_dimensions: int = 768
# Qdrant
qdrant_url: str = "http://localhost:6333"
qdrant_collection: str = "chrysopedia"
# Prompt templates
prompts_path: str = "./prompts"
# Review mode — when True, extracted moments go to review queue before publishing
review_mode: bool = True
# File storage
transcript_storage_path: str = "/data/transcripts"
video_metadata_path: str = "/data/video_meta"
model_config = {
"env_file": ".env",
"env_file_encoding": "utf-8",
"case_sensitive": False,
}
@lru_cache
def get_settings() -> Settings:
"""Return cached application settings (singleton)."""
return Settings()