From 17347da87ef9b454819ef5de85bb00cfea6ddc85 Mon Sep 17 00:00:00 2001 From: jlightner Date: Mon, 30 Mar 2026 05:42:27 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20Switch=20to=20FYN-LLM-Agent=20models=20?= =?UTF-8?q?=E2=80=94=20chat=20for=20stages=202/4,=20think=20for=20stages?= =?UTF-8?q?=203/5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 20 ++++++++++---------- backend/config.py | 18 +++++++++--------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.env.example b/.env.example index 7e9ca94..75e0d0b 100644 --- a/.env.example +++ b/.env.example @@ -12,22 +12,22 @@ REDIS_URL=redis://chrysopedia-redis:6379/0 # LLM endpoint (OpenAI-compatible — OpenWebUI on FYN DGX) LLM_API_URL=https://chat.forgetyour.name/api/v1 LLM_API_KEY=sk-changeme -LLM_MODEL=FYN-QWEN35 +LLM_MODEL=fyn-llm-agent-chat LLM_FALLBACK_URL=https://chat.forgetyour.name/api/v1 -LLM_FALLBACK_MODEL=fyn-qwen35-chat +LLM_FALLBACK_MODEL=fyn-llm-agent-chat # Per-stage LLM model overrides (optional — defaults to LLM_MODEL) # Modality: "chat" = standard JSON mode, "thinking" = reasoning model (strips tags) # Stages 2 (segmentation) and 4 (classification) are mechanical — use fast chat model # Stages 3 (extraction) and 5 (synthesis) need reasoning — use thinking model -#LLM_STAGE2_MODEL=fyn-qwen35-chat -#LLM_STAGE2_MODALITY=chat -#LLM_STAGE3_MODEL=fyn-qwen35-thinking -#LLM_STAGE3_MODALITY=thinking -#LLM_STAGE4_MODEL=fyn-qwen35-chat -#LLM_STAGE4_MODALITY=chat -#LLM_STAGE5_MODEL=fyn-qwen35-thinking -#LLM_STAGE5_MODALITY=thinking +LLM_STAGE2_MODEL=fyn-llm-agent-chat +LLM_STAGE2_MODALITY=chat +LLM_STAGE3_MODEL=fyn-llm-agent-think +LLM_STAGE3_MODALITY=thinking +LLM_STAGE4_MODEL=fyn-llm-agent-chat +LLM_STAGE4_MODALITY=chat +LLM_STAGE5_MODEL=fyn-llm-agent-think +LLM_STAGE5_MODALITY=thinking # Max tokens for LLM responses (OpenWebUI defaults to 1000 — pipeline needs much more) LLM_MAX_TOKENS=65536 diff --git a/backend/config.py b/backend/config.py index 705580d..d6e2b6f 100644 --- a/backend/config.py +++ b/backend/config.py @@ -29,19 +29,19 @@ class Settings(BaseSettings): # LLM endpoint (OpenAI-compatible) llm_api_url: str = "http://localhost:11434/v1" llm_api_key: str = "sk-placeholder" - llm_model: str = "qwen2.5:14b-q8_0" + llm_model: str = "fyn-llm-agent-chat" llm_fallback_url: str = "http://localhost:11434/v1" - llm_fallback_model: str = "qwen2.5:14b-q8_0" + llm_fallback_model: str = "fyn-llm-agent-chat" # Per-stage model overrides (optional — falls back to llm_model / "chat") - llm_stage2_model: str | None = None # segmentation — fast chat model recommended - llm_stage2_modality: str = "chat" # "chat" or "thinking" - llm_stage3_model: str | None = None # extraction — thinking model recommended - llm_stage3_modality: str = "chat" - llm_stage4_model: str | None = None # classification — fast chat model recommended + llm_stage2_model: str | None = "fyn-llm-agent-chat" # segmentation — mechanical, fast chat + llm_stage2_modality: str = "chat" + llm_stage3_model: str | None = "fyn-llm-agent-think" # extraction — reasoning + llm_stage3_modality: str = "thinking" + llm_stage4_model: str | None = "fyn-llm-agent-chat" # classification — mechanical, fast chat llm_stage4_modality: str = "chat" - llm_stage5_model: str | None = None # synthesis — thinking model recommended - llm_stage5_modality: str = "chat" + llm_stage5_model: str | None = "fyn-llm-agent-think" # synthesis — reasoning + llm_stage5_modality: str = "thinking" # Max tokens for LLM responses (OpenWebUI defaults to 1000 which truncates pipeline JSON) llm_max_tokens: int = 65536