From cf759f37394f95525716654f6b1fbe04ba221d8c Mon Sep 17 00:00:00 2001 From: jlightner Date: Mon, 30 Mar 2026 04:08:29 +0000 Subject: [PATCH] fix: Add max_tokens=16384 to LLM requests (OpenWebUI defaults to 1000, truncating pipeline JSON) --- .env.example | 3 +++ backend/config.py | 3 +++ backend/pipeline/llm_client.py | 2 ++ 3 files changed, 8 insertions(+) diff --git a/.env.example b/.env.example index ac7a180..1bc2026 100644 --- a/.env.example +++ b/.env.example @@ -29,6 +29,9 @@ LLM_FALLBACK_MODEL=fyn-qwen35-chat #LLM_STAGE5_MODEL=fyn-qwen35-thinking #LLM_STAGE5_MODALITY=thinking +# Max tokens for LLM responses (OpenWebUI defaults to 1000 — pipeline needs much more) +LLM_MAX_TOKENS=16384 + # Embedding endpoint (Ollama container in the compose stack) EMBEDDING_API_URL=http://chrysopedia-ollama:11434/v1 EMBEDDING_MODEL=nomic-embed-text diff --git a/backend/config.py b/backend/config.py index 8a2b9a9..db37b87 100644 --- a/backend/config.py +++ b/backend/config.py @@ -43,6 +43,9 @@ class Settings(BaseSettings): llm_stage5_model: str | None = None # synthesis — thinking model recommended llm_stage5_modality: str = "chat" + # Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON) + llm_max_tokens: int = 16384 + # Embedding endpoint embedding_api_url: str = "http://localhost:11434/v1" embedding_model: str = "nomic-embed-text" diff --git a/backend/pipeline/llm_client.py b/backend/pipeline/llm_client.py index 3c6671a..3ecf1df 100644 --- a/backend/pipeline/llm_client.py +++ b/backend/pipeline/llm_client.py @@ -136,6 +136,7 @@ class LLMClient: response = self._primary.chat.completions.create( model=primary_model, messages=messages, + max_tokens=self.settings.llm_max_tokens, **kwargs, ) raw = response.choices[0].message.content or "" @@ -156,6 +157,7 @@ class LLMClient: response = self._fallback.chat.completions.create( model=fallback_model, messages=messages, + max_tokens=self.settings.llm_max_tokens, **kwargs, ) raw = response.choices[0].message.content or ""