From dfaf0481fe14ee3ea61523716b27f2c96a061bbf Mon Sep 17 00:00:00 2001 From: jlightner Date: Mon, 30 Mar 2026 05:37:21 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20Reduce=20Celery=20worker=20concurrency?= =?UTF-8?q?=20from=202=20to=201=20=E2=80=94=20concurrent=20LLM=20requests?= =?UTF-8?q?=20cause=20empty=20responses?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qwen 3.5 397B (quantized) returns empty content when handling two large-context extraction requests simultaneously, likely due to vLLM memory pressure. Sequential processing eliminates this failure mode. --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index eef5c10..4ea235d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -125,7 +125,7 @@ services: QDRANT_URL: http://chrysopedia-qdrant:6333 EMBEDDING_API_URL: http://chrysopedia-ollama:11434/v1 PROMPTS_PATH: /prompts - command: ["celery", "-A", "worker", "worker", "--loglevel=info", "--concurrency=2"] + command: ["celery", "-A", "worker", "worker", "--loglevel=info", "--concurrency=1"] healthcheck: test: ["CMD-SHELL", "celery -A worker inspect ping --timeout=5 2>/dev/null | grep -q pong || exit 1"] interval: 30s