chore: Bump LLM max_tokens to 32768, commit M002/M003 GSD artifacts
- max_tokens bumped from 16384 to 32768 (extraction responses still hitting limits) - All GSD planning/completion artifacts for M002 (deployment) and M003 (DNS + LLM routing) - KNOWLEDGE.md updated with XPLTD domain setup flow and container healthcheck patterns - DECISIONS.md updated with D015 (subnet) and D016 (Ollama for embeddings)
This commit is contained in:
parent
e09147760d
commit
9bb40f57e4
2 changed files with 2 additions and 2 deletions
|
|
@ -30,7 +30,7 @@ LLM_FALLBACK_MODEL=fyn-qwen35-chat
|
||||||
#LLM_STAGE5_MODALITY=thinking
|
#LLM_STAGE5_MODALITY=thinking
|
||||||
|
|
||||||
# Max tokens for LLM responses (OpenWebUI defaults to 1000 — pipeline needs much more)
|
# Max tokens for LLM responses (OpenWebUI defaults to 1000 — pipeline needs much more)
|
||||||
LLM_MAX_TOKENS=16384
|
LLM_MAX_TOKENS=32768
|
||||||
|
|
||||||
# Embedding endpoint (Ollama container in the compose stack)
|
# Embedding endpoint (Ollama container in the compose stack)
|
||||||
EMBEDDING_API_URL=http://chrysopedia-ollama:11434/v1
|
EMBEDDING_API_URL=http://chrysopedia-ollama:11434/v1
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ class Settings(BaseSettings):
|
||||||
llm_stage5_modality: str = "chat"
|
llm_stage5_modality: str = "chat"
|
||||||
|
|
||||||
# Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON)
|
# Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON)
|
||||||
llm_max_tokens: int = 16384
|
llm_max_tokens: int = 32768
|
||||||
|
|
||||||
# Embedding endpoint
|
# Embedding endpoint
|
||||||
embedding_api_url: str = "http://localhost:11434/v1"
|
embedding_api_url: str = "http://localhost:11434/v1"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue