From 9bb40f57e41ac4c8e433f4b8540c0c58ac0f270c Mon Sep 17 00:00:00 2001
From: jlightner <jlightner@users.noreply.github.com>
Date: Mon, 30 Mar 2026 04:22:45 +0000
Subject: [PATCH] chore: Bump LLM max_tokens to 32768, commit M002/M003 GSD
 artifacts

- max_tokens bumped from 16384 to 32768 (extraction responses still hitting limits)
- All GSD planning/completion artifacts for M002 (deployment) and M003 (DNS + LLM routing)
- KNOWLEDGE.md updated with XPLTD domain setup flow and container healthcheck patterns
- DECISIONS.md updated with D015 (subnet) and D016 (Ollama for embeddings)
---
 .env.example      | 2 +-
 backend/config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.env.example b/.env.example
index 1bc2026..9fad03b 100644
--- a/.env.example
+++ b/.env.example
@@ -30,7 +30,7 @@ LLM_FALLBACK_MODEL=fyn-qwen35-chat
 #LLM_STAGE5_MODALITY=thinking
 
 # Max tokens for LLM responses (OpenWebUI defaults to 1000 — pipeline needs much more)
-LLM_MAX_TOKENS=16384
+LLM_MAX_TOKENS=32768
 
 # Embedding endpoint (Ollama container in the compose stack)
 EMBEDDING_API_URL=http://chrysopedia-ollama:11434/v1
diff --git a/backend/config.py b/backend/config.py
index db37b87..5ba5446 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -44,7 +44,7 @@ class Settings(BaseSettings):
     llm_stage5_modality: str = "chat"
 
     # Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON)
-    llm_max_tokens: int = 16384
+    llm_max_tokens: int = 32768
 
     # Embedding endpoint
     embedding_api_url: str = "http://localhost:11434/v1"