From 3cbb6146548cf395bcd76b7b739ca213ab625fa2 Mon Sep 17 00:00:00 2001
From: jlightner <jlightner@users.noreply.github.com>
Date: Sat, 4 Apr 2026 14:45:09 +0000
Subject: [PATCH] =?UTF-8?q?test:=20Rewrote=20=5FSYSTEM=5FPROMPT=5FTEMPLATE?=
 =?UTF-8?q?=20with=20citation=20density=20rules,=20resp=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- "backend/chat_service.py"

GSD-Task: S09/T02
---
 .gsd/milestones/M025/slices/S09/S09-PLAN.md   |  2 +-
 .../M025/slices/S09/tasks/T01-VERIFY.json     | 16 ++++
 .../M025/slices/S09/tasks/T02-SUMMARY.md      | 74 +++++++++++++++++++
 backend/chat_service.py                       | 20 ++++-
 4 files changed, 107 insertions(+), 5 deletions(-)
 create mode 100644 .gsd/milestones/M025/slices/S09/tasks/T01-VERIFY.json
 create mode 100644 .gsd/milestones/M025/slices/S09/tasks/T02-SUMMARY.md

diff --git a/.gsd/milestones/M025/slices/S09/S09-PLAN.md b/.gsd/milestones/M025/slices/S09/S09-PLAN.md
index b2f5ad0..05ad6ba 100644
--- a/.gsd/milestones/M025/slices/S09/S09-PLAN.md
+++ b/.gsd/milestones/M025/slices/S09/S09-PLAN.md
@@ -20,7 +20,7 @@ Steps:
   - Estimate: 2h
   - Files: backend/pipeline/quality/chat_scorer.py, backend/pipeline/quality/chat_eval.py, backend/pipeline/quality/fixtures/chat_test_suite.yaml, backend/pipeline/quality/__main__.py
   - Verify: cd backend && python -c 'from pipeline.quality.chat_scorer import ChatScoreRunner, ChatScoreResult; from pipeline.quality.chat_eval import ChatEvalRunner; print("OK")'
-- [ ] **T02: Refine chat system prompt and verify no test regressions** — Improve the `_SYSTEM_PROMPT_TEMPLATE` in `backend/chat_service.py` based on the gaps identified in research: the current prompt is 5 lines with no guidance on citation density, response structure, domain awareness, conflicting source handling, or response length.
+- [x] **T02: Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged** — Improve the `_SYSTEM_PROMPT_TEMPLATE` in `backend/chat_service.py` based on the gaps identified in research: the current prompt is 5 lines with no guidance on citation density, response structure, domain awareness, conflicting source handling, or response length.
 
 The refined prompt should:
 - Guide citation density: cite every factual claim, prefer inline citations [N] immediately after the claim
diff --git a/.gsd/milestones/M025/slices/S09/tasks/T01-VERIFY.json b/.gsd/milestones/M025/slices/S09/tasks/T01-VERIFY.json
new file mode 100644
index 0000000..7acc9cc
--- /dev/null
+++ b/.gsd/milestones/M025/slices/S09/tasks/T01-VERIFY.json
@@ -0,0 +1,16 @@
+{
+  "schemaVersion": 1,
+  "taskId": "T01",
+  "unitId": "M025/S09/T01",
+  "timestamp": 1775313832904,
+  "passed": true,
+  "discoverySource": "task-plan",
+  "checks": [
+    {
+      "command": "cd backend",
+      "exitCode": 0,
+      "durationMs": 14,
+      "verdict": "pass"
+    }
+  ]
+}
diff --git a/.gsd/milestones/M025/slices/S09/tasks/T02-SUMMARY.md b/.gsd/milestones/M025/slices/S09/tasks/T02-SUMMARY.md
new file mode 100644
index 0000000..19c935a
--- /dev/null
+++ b/.gsd/milestones/M025/slices/S09/tasks/T02-SUMMARY.md
@@ -0,0 +1,74 @@
+---
+id: T02
+parent: S09
+milestone: M025
+provides: []
+requires: []
+affects: []
+key_files: ["backend/chat_service.py"]
+key_decisions: ["Kept prompt under 20 lines using markdown headers for structure rather than prose paragraphs"]
+patterns_established: []
+drill_down_paths: []
+observability_surfaces: []
+duration: ""
+verification_result: "cd backend && python -m pytest tests/test_chat.py -v — 26 passed in 1.37s"
+completed_at: 2026-04-04T14:45:01.092Z
+blocker_discovered: false
+---
+
+# T02: Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged
+
+> Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged
+
+## What Happened
+---
+id: T02
+parent: S09
+milestone: M025
+key_files:
+  - backend/chat_service.py
+key_decisions:
+  - Kept prompt under 20 lines using markdown headers for structure rather than prose paragraphs
+duration: ""
+verification_result: passed
+completed_at: 2026-04-04T14:45:01.092Z
+blocker_discovered: false
+---
+
+# T02: Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged
+
+**Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged**
+
+## What Happened
+
+Replaced the 5-line system prompt with a structured prompt addressing citation density, response format, domain terminology, conflicting source handling, and response length. No test changes needed — all 26 tests verify behavioral properties, not prompt wording.
+
+## Verification
+
+cd backend && python -m pytest tests/test_chat.py -v — 26 passed in 1.37s
+
+## Verification Evidence
+
+| # | Command | Exit Code | Verdict | Duration |
+|---|---------|-----------|---------|----------|
+| 1 | `cd backend && python -m pytest tests/test_chat.py -v` | 0 | ✅ pass | 1370ms |
+
+
+## Deviations
+
+None.
+
+## Known Issues
+
+None.
+
+## Files Created/Modified
+
+- `backend/chat_service.py`
+
+
+## Deviations
+None.
+
+## Known Issues
+None.
diff --git a/backend/chat_service.py b/backend/chat_service.py
index ca68013..390d17a 100644
--- a/backend/chat_service.py
+++ b/backend/chat_service.py
@@ -31,10 +31,22 @@ from search_service import SearchService
 logger = logging.getLogger("chrysopedia.chat")
 
 _SYSTEM_PROMPT_TEMPLATE = """\
-You are Chrysopedia, an expert encyclopedic assistant for music production techniques.
-Answer the user's question using ONLY the numbered sources below. Cite sources by
-writing [N] inline (e.g. [1], [2]) where N is the source number. If the sources
-do not contain enough information, say so honestly — do not invent facts.
+You are Chrysopedia, an expert assistant for music production techniques — \
+synthesis, sound design, mixing, sampling, and audio processing.
+
+## Rules
+- Use ONLY the numbered sources below. Do not invent facts.
+- Cite every factual claim inline with [N] immediately after the claim \
+(e.g. "Parallel compression adds sustain [2] while preserving transients [1].").
+- When sources disagree, present both perspectives with their citations.
+- If the sources lack enough information, say so honestly.
+
+## Response format
+- Aim for 2–4 short paragraphs. Expand only when the question warrants detail.
+- Use bullet lists for steps, signal chains, or parameter lists.
+- **Bold** key terms on first mention.
+- Use audio/synthesis/mixing terminology naturally — do not over-explain \
+standard concepts (e.g. LFO, sidechain, wet/dry) unless the user asks.
 
 Sources:
 {context_block}