feat: Switch to FYN-LLM-Agent models — chat for stages 2/4, think for stages 3/5

This commit is contained in:
jlightner 2026-03-30 05:42:27 +00:00
parent dfaf0481fe
commit 17347da87e
2 changed files with 19 additions and 19 deletions

View file

@ -12,22 +12,22 @@ REDIS_URL=redis://chrysopedia-redis:6379/0
# LLM endpoint (OpenAI-compatible — OpenWebUI on FYN DGX)
LLM_API_URL=https://chat.forgetyour.name/api/v1
LLM_API_KEY=sk-changeme
LLM_MODEL=FYN-QWEN35
LLM_MODEL=fyn-llm-agent-chat
LLM_FALLBACK_URL=https://chat.forgetyour.name/api/v1
LLM_FALLBACK_MODEL=fyn-qwen35-chat
LLM_FALLBACK_MODEL=fyn-llm-agent-chat
# Per-stage LLM model overrides (optional — defaults to LLM_MODEL)
# Modality: "chat" = standard JSON mode, "thinking" = reasoning model (strips <think> tags)
# Stages 2 (segmentation) and 4 (classification) are mechanical — use fast chat model
# Stages 3 (extraction) and 5 (synthesis) need reasoning — use thinking model
#LLM_STAGE2_MODEL=fyn-qwen35-chat
#LLM_STAGE2_MODALITY=chat
#LLM_STAGE3_MODEL=fyn-qwen35-thinking
#LLM_STAGE3_MODALITY=thinking
#LLM_STAGE4_MODEL=fyn-qwen35-chat
#LLM_STAGE4_MODALITY=chat
#LLM_STAGE5_MODEL=fyn-qwen35-thinking
#LLM_STAGE5_MODALITY=thinking
LLM_STAGE2_MODEL=fyn-llm-agent-chat
LLM_STAGE2_MODALITY=chat
LLM_STAGE3_MODEL=fyn-llm-agent-think
LLM_STAGE3_MODALITY=thinking
LLM_STAGE4_MODEL=fyn-llm-agent-chat
LLM_STAGE4_MODALITY=chat
LLM_STAGE5_MODEL=fyn-llm-agent-think
LLM_STAGE5_MODALITY=thinking
# Max tokens for LLM responses (OpenWebUI defaults to 1000 — pipeline needs much more)
LLM_MAX_TOKENS=65536

View file

@ -29,19 +29,19 @@ class Settings(BaseSettings):
# LLM endpoint (OpenAI-compatible)
llm_api_url: str = "http://localhost:11434/v1"
llm_api_key: str = "sk-placeholder"
llm_model: str = "qwen2.5:14b-q8_0"
llm_model: str = "fyn-llm-agent-chat"
llm_fallback_url: str = "http://localhost:11434/v1"
llm_fallback_model: str = "qwen2.5:14b-q8_0"
llm_fallback_model: str = "fyn-llm-agent-chat"
# Per-stage model overrides (optional — falls back to llm_model / "chat")
llm_stage2_model: str | None = None # segmentation — fast chat model recommended
llm_stage2_modality: str = "chat" # "chat" or "thinking"
llm_stage3_model: str | None = None # extraction — thinking model recommended
llm_stage3_modality: str = "chat"
llm_stage4_model: str | None = None # classification — fast chat model recommended
llm_stage2_model: str | None = "fyn-llm-agent-chat" # segmentation — mechanical, fast chat
llm_stage2_modality: str = "chat"
llm_stage3_model: str | None = "fyn-llm-agent-think" # extraction — reasoning
llm_stage3_modality: str = "thinking"
llm_stage4_model: str | None = "fyn-llm-agent-chat" # classification — mechanical, fast chat
llm_stage4_modality: str = "chat"
llm_stage5_model: str | None = None # synthesis — thinking model recommended
llm_stage5_modality: str = "chat"
llm_stage5_model: str | None = "fyn-llm-agent-think" # synthesis — reasoning
llm_stage5_modality: str = "thinking"
# Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON)
llm_max_tokens: int = 65536