fix: Bump max_tokens to 65536 (model supports 94K context, extraction needs headroom)

2026-03-30 04:57:44 +00:00 · 2026-03-30 04:57:44 +00:00 · f67e676264
commit f67e676264
parent 6fb497d03a
2 changed files with 2 additions and 2 deletions
--- a/.env.example
+++ b/.env.example
@ -30,7 +30,7 @@ LLM_FALLBACK_MODEL=fyn-qwen35-chat
 #LLM_STAGE5_MODALITY=thinking

 # Max tokens for LLM responses (OpenWebUI defaults to 1000 — pipeline needs much more)
-LLM_MAX_TOKENS=32768
+LLM_MAX_TOKENS=65536

 # Embedding endpoint (Ollama container in the compose stack)
 EMBEDDING_API_URL=http://chrysopedia-ollama:11434/v1
--- a/backend/config.py
+++ b/backend/config.py
@ -44,7 +44,7 @@ class Settings(BaseSettings):
    llm_stage5_modality: str = "chat"

    # Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON)
-    llm_max_tokens: int = 32768
+    llm_max_tokens: int = 65536

    # Embedding endpoint
    embedding_api_url: str = "http://localhost:11434/v1"