chrysopedia/backend/config.py
jlightner c6c15defee feat: Dynamic token estimation for per-stage max_tokens
- Add estimate_tokens() and estimate_max_tokens() to llm_client with
  stage-specific output ratios (0.3x segmentation, 1.2x extraction,
  0.15x classification, 1.5x synthesis)
- Add max_tokens override parameter to LLMClient.complete()
- Wire all 4 pipeline stages to estimate max_tokens from actual prompt
  content with 20% buffer and 2048 floor
- Add LLM_MAX_TOKENS_HARD_LIMIT=32768 config (dynamic estimator ceiling)
- Log token estimates alongside every LLM request

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-30 05:55:17 -05:00

79 lines
2.6 KiB
Python

"""Application configuration loaded from environment variables."""
from functools import lru_cache
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Chrysopedia API settings.
Values are loaded from environment variables (or .env file via
pydantic-settings' dotenv support).
"""
# Database
database_url: str = "postgresql+asyncpg://chrysopedia:changeme@localhost:5433/chrysopedia"
# Redis
redis_url: str = "redis://localhost:6379/0"
# Application
app_env: str = "development"
app_log_level: str = "info"
app_secret_key: str = "changeme-generate-a-real-secret"
# CORS
cors_origins: list[str] = ["*"]
# LLM endpoint (OpenAI-compatible)
llm_api_url: str = "http://localhost:11434/v1"
llm_api_key: str = "sk-placeholder"
llm_model: str = "fyn-llm-agent-chat"
llm_fallback_url: str = "http://localhost:11434/v1"
llm_fallback_model: str = "fyn-llm-agent-chat"
# Per-stage model overrides (optional — falls back to llm_model / "chat")
llm_stage2_model: str | None = "fyn-llm-agent-chat" # segmentation — mechanical, fast chat
llm_stage2_modality: str = "chat"
llm_stage3_model: str | None = "fyn-llm-agent-think" # extraction — reasoning
llm_stage3_modality: str = "thinking"
llm_stage4_model: str | None = "fyn-llm-agent-chat" # classification — mechanical, fast chat
llm_stage4_modality: str = "chat"
llm_stage5_model: str | None = "fyn-llm-agent-think" # synthesis — reasoning
llm_stage5_modality: str = "thinking"
# Dynamic token estimation — each stage calculates max_tokens from input size
llm_max_tokens_hard_limit: int = 32768 # Hard ceiling for dynamic estimator
llm_max_tokens: int = 65536 # Fallback when no estimate is provided
# Embedding endpoint
embedding_api_url: str = "http://localhost:11434/v1"
embedding_model: str = "nomic-embed-text"
embedding_dimensions: int = 768
# Qdrant
qdrant_url: str = "http://localhost:6333"
qdrant_collection: str = "chrysopedia"
# Prompt templates
prompts_path: str = "./prompts"
# Review mode — when True, extracted moments go to review queue before publishing
review_mode: bool = True
# File storage
transcript_storage_path: str = "/data/transcripts"
video_metadata_path: str = "/data/video_meta"
model_config = {
"env_file": ".env",
"env_file_encoding": "utf-8",
"case_sensitive": False,
}
@lru_cache
def get_settings() -> Settings:
"""Return cached application settings (singleton)."""
return Settings()