chrysopedia/backend/config.py

"""Application configuration loaded from environment variables."""

from functools import lru_cache

from pydantic_settings import BaseSettings


class Settings(BaseSettings):
    """Chrysopedia API settings.

    Values are loaded from environment variables (or .env file via
    pydantic-settings' dotenv support).
    """

    # Database
    database_url: str = "postgresql+asyncpg://chrysopedia:changeme@localhost:5433/chrysopedia"

    # Redis
    redis_url: str = "redis://localhost:6379/0"

    # Application
    app_env: str = "development"
    app_log_level: str = "info"
    app_secret_key: str = "changeme-generate-a-real-secret"

    # CORS
    cors_origins: list[str] = ["*"]

    # LLM endpoint (OpenAI-compatible)
    llm_api_url: str = "http://localhost:11434/v1"
    llm_api_key: str = "sk-placeholder"
    llm_model: str = "fyn-llm-agent-chat"
    llm_fallback_url: str = "http://localhost:11434/v1"
    llm_fallback_model: str = "fyn-llm-agent-chat"

    # Per-stage model overrides (optional — falls back to llm_model / "chat")
    llm_stage2_model: str | None = "fyn-llm-agent-chat"   # segmentation — mechanical, fast chat
    llm_stage2_modality: str = "chat"
    llm_stage3_model: str | None = "fyn-llm-agent-think"  # extraction — reasoning
    llm_stage3_modality: str = "thinking"
    llm_stage4_model: str | None = "fyn-llm-agent-chat"   # classification — mechanical, fast chat
    llm_stage4_modality: str = "chat"
    llm_stage5_model: str | None = "fyn-llm-agent-think"  # synthesis — reasoning
    llm_stage5_modality: str = "thinking"

    # Dynamic token estimation — each stage calculates max_tokens from input size
    llm_max_tokens_hard_limit: int = 32768   # Hard ceiling for dynamic estimator
    llm_max_tokens: int = 65536              # Fallback when no estimate is provided

    # Stage 5 synthesis chunking — max moments per LLM call before splitting
    synthesis_chunk_size: int = 30

    # Embedding endpoint
    embedding_api_url: str = "http://localhost:11434/v1"
    embedding_model: str = "nomic-embed-text"
    embedding_dimensions: int = 768

    # Qdrant
    qdrant_url: str = "http://localhost:6333"
    qdrant_collection: str = "chrysopedia"

    # Prompt templates
    prompts_path: str = "./prompts"

    # Debug mode — when True, pipeline captures full LLM prompts and responses
    debug_mode: bool = False

    # File storage
    transcript_storage_path: str = "/data/transcripts"
    video_metadata_path: str = "/data/video_meta"

    # Git commit SHA (set at Docker build time or via env var)
    git_commit_sha: str = "unknown"

    model_config = {
        "env_file": ".env",
        "env_file_encoding": "utf-8",
        "case_sensitive": False,
    }


@lru_cache
def get_settings() -> Settings:
    """Return cached application settings (singleton)."""
    return Settings()