fix: Add max_tokens=16384 to LLM requests (OpenWebUI defaults to 1000, truncating pipeline JSON)
This commit is contained in:
parent
8e96fae64f
commit
cf759f3739
3 changed files with 8 additions and 0 deletions
|
|
@ -29,6 +29,9 @@ LLM_FALLBACK_MODEL=fyn-qwen35-chat
|
||||||
#LLM_STAGE5_MODEL=fyn-qwen35-thinking
|
#LLM_STAGE5_MODEL=fyn-qwen35-thinking
|
||||||
#LLM_STAGE5_MODALITY=thinking
|
#LLM_STAGE5_MODALITY=thinking
|
||||||
|
|
||||||
|
# Max tokens for LLM responses (OpenWebUI defaults to 1000 — pipeline needs much more)
|
||||||
|
LLM_MAX_TOKENS=16384
|
||||||
|
|
||||||
# Embedding endpoint (Ollama container in the compose stack)
|
# Embedding endpoint (Ollama container in the compose stack)
|
||||||
EMBEDDING_API_URL=http://chrysopedia-ollama:11434/v1
|
EMBEDDING_API_URL=http://chrysopedia-ollama:11434/v1
|
||||||
EMBEDDING_MODEL=nomic-embed-text
|
EMBEDDING_MODEL=nomic-embed-text
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,9 @@ class Settings(BaseSettings):
|
||||||
llm_stage5_model: str | None = None # synthesis — thinking model recommended
|
llm_stage5_model: str | None = None # synthesis — thinking model recommended
|
||||||
llm_stage5_modality: str = "chat"
|
llm_stage5_modality: str = "chat"
|
||||||
|
|
||||||
|
# Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON)
|
||||||
|
llm_max_tokens: int = 16384
|
||||||
|
|
||||||
# Embedding endpoint
|
# Embedding endpoint
|
||||||
embedding_api_url: str = "http://localhost:11434/v1"
|
embedding_api_url: str = "http://localhost:11434/v1"
|
||||||
embedding_model: str = "nomic-embed-text"
|
embedding_model: str = "nomic-embed-text"
|
||||||
|
|
|
||||||
|
|
@ -136,6 +136,7 @@ class LLMClient:
|
||||||
response = self._primary.chat.completions.create(
|
response = self._primary.chat.completions.create(
|
||||||
model=primary_model,
|
model=primary_model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
max_tokens=self.settings.llm_max_tokens,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
raw = response.choices[0].message.content or ""
|
raw = response.choices[0].message.content or ""
|
||||||
|
|
@ -156,6 +157,7 @@ class LLMClient:
|
||||||
response = self._fallback.chat.completions.create(
|
response = self._fallback.chat.completions.create(
|
||||||
model=fallback_model,
|
model=fallback_model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
max_tokens=self.settings.llm_max_tokens,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
raw = response.choices[0].message.content or ""
|
raw = response.choices[0].message.content or ""
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue