From 3cbb6146548cf395bcd76b7b739ca213ab625fa2 Mon Sep 17 00:00:00 2001 From: jlightner Date: Sat, 4 Apr 2026 14:45:09 +0000 Subject: [PATCH] =?UTF-8?q?test:=20Rewrote=20=5FSYSTEM=5FPROMPT=5FTEMPLATE?= =?UTF-8?q?=20with=20citation=20density=20rules,=20resp=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "backend/chat_service.py" GSD-Task: S09/T02 --- .gsd/milestones/M025/slices/S09/S09-PLAN.md | 2 +- .../M025/slices/S09/tasks/T01-VERIFY.json | 16 ++++ .../M025/slices/S09/tasks/T02-SUMMARY.md | 74 +++++++++++++++++++ backend/chat_service.py | 20 ++++- 4 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 .gsd/milestones/M025/slices/S09/tasks/T01-VERIFY.json create mode 100644 .gsd/milestones/M025/slices/S09/tasks/T02-SUMMARY.md diff --git a/.gsd/milestones/M025/slices/S09/S09-PLAN.md b/.gsd/milestones/M025/slices/S09/S09-PLAN.md index b2f5ad0..05ad6ba 100644 --- a/.gsd/milestones/M025/slices/S09/S09-PLAN.md +++ b/.gsd/milestones/M025/slices/S09/S09-PLAN.md @@ -20,7 +20,7 @@ Steps: - Estimate: 2h - Files: backend/pipeline/quality/chat_scorer.py, backend/pipeline/quality/chat_eval.py, backend/pipeline/quality/fixtures/chat_test_suite.yaml, backend/pipeline/quality/__main__.py - Verify: cd backend && python -c 'from pipeline.quality.chat_scorer import ChatScoreRunner, ChatScoreResult; from pipeline.quality.chat_eval import ChatEvalRunner; print("OK")' -- [ ] **T02: Refine chat system prompt and verify no test regressions** — Improve the `_SYSTEM_PROMPT_TEMPLATE` in `backend/chat_service.py` based on the gaps identified in research: the current prompt is 5 lines with no guidance on citation density, response structure, domain awareness, conflicting source handling, or response length. +- [x] **T02: Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged** — Improve the `_SYSTEM_PROMPT_TEMPLATE` in `backend/chat_service.py` based on the gaps identified in research: the current prompt is 5 lines with no guidance on citation density, response structure, domain awareness, conflicting source handling, or response length. The refined prompt should: - Guide citation density: cite every factual claim, prefer inline citations [N] immediately after the claim diff --git a/.gsd/milestones/M025/slices/S09/tasks/T01-VERIFY.json b/.gsd/milestones/M025/slices/S09/tasks/T01-VERIFY.json new file mode 100644 index 0000000..7acc9cc --- /dev/null +++ b/.gsd/milestones/M025/slices/S09/tasks/T01-VERIFY.json @@ -0,0 +1,16 @@ +{ + "schemaVersion": 1, + "taskId": "T01", + "unitId": "M025/S09/T01", + "timestamp": 1775313832904, + "passed": true, + "discoverySource": "task-plan", + "checks": [ + { + "command": "cd backend", + "exitCode": 0, + "durationMs": 14, + "verdict": "pass" + } + ] +} diff --git a/.gsd/milestones/M025/slices/S09/tasks/T02-SUMMARY.md b/.gsd/milestones/M025/slices/S09/tasks/T02-SUMMARY.md new file mode 100644 index 0000000..19c935a --- /dev/null +++ b/.gsd/milestones/M025/slices/S09/tasks/T02-SUMMARY.md @@ -0,0 +1,74 @@ +--- +id: T02 +parent: S09 +milestone: M025 +provides: [] +requires: [] +affects: [] +key_files: ["backend/chat_service.py"] +key_decisions: ["Kept prompt under 20 lines using markdown headers for structure rather than prose paragraphs"] +patterns_established: [] +drill_down_paths: [] +observability_surfaces: [] +duration: "" +verification_result: "cd backend && python -m pytest tests/test_chat.py -v — 26 passed in 1.37s" +completed_at: 2026-04-04T14:45:01.092Z +blocker_discovered: false +--- + +# T02: Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged + +> Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged + +## What Happened +--- +id: T02 +parent: S09 +milestone: M025 +key_files: + - backend/chat_service.py +key_decisions: + - Kept prompt under 20 lines using markdown headers for structure rather than prose paragraphs +duration: "" +verification_result: passed +completed_at: 2026-04-04T14:45:01.092Z +blocker_discovered: false +--- + +# T02: Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged + +**Rewrote _SYSTEM_PROMPT_TEMPLATE with citation density rules, response structure guidance, domain-aware terminology handling, and conflicting-source instructions — all 26 chat tests pass unchanged** + +## What Happened + +Replaced the 5-line system prompt with a structured prompt addressing citation density, response format, domain terminology, conflicting source handling, and response length. No test changes needed — all 26 tests verify behavioral properties, not prompt wording. + +## Verification + +cd backend && python -m pytest tests/test_chat.py -v — 26 passed in 1.37s + +## Verification Evidence + +| # | Command | Exit Code | Verdict | Duration | +|---|---------|-----------|---------|----------| +| 1 | `cd backend && python -m pytest tests/test_chat.py -v` | 0 | ✅ pass | 1370ms | + + +## Deviations + +None. + +## Known Issues + +None. + +## Files Created/Modified + +- `backend/chat_service.py` + + +## Deviations +None. + +## Known Issues +None. diff --git a/backend/chat_service.py b/backend/chat_service.py index ca68013..390d17a 100644 --- a/backend/chat_service.py +++ b/backend/chat_service.py @@ -31,10 +31,22 @@ from search_service import SearchService logger = logging.getLogger("chrysopedia.chat") _SYSTEM_PROMPT_TEMPLATE = """\ -You are Chrysopedia, an expert encyclopedic assistant for music production techniques. -Answer the user's question using ONLY the numbered sources below. Cite sources by -writing [N] inline (e.g. [1], [2]) where N is the source number. If the sources -do not contain enough information, say so honestly — do not invent facts. +You are Chrysopedia, an expert assistant for music production techniques — \ +synthesis, sound design, mixing, sampling, and audio processing. + +## Rules +- Use ONLY the numbered sources below. Do not invent facts. +- Cite every factual claim inline with [N] immediately after the claim \ +(e.g. "Parallel compression adds sustain [2] while preserving transients [1]."). +- When sources disagree, present both perspectives with their citations. +- If the sources lack enough information, say so honestly. + +## Response format +- Aim for 2–4 short paragraphs. Expand only when the question warrants detail. +- Use bullet lists for steps, signal chains, or parameter lists. +- **Bold** key terms on first mention. +- Use audio/synthesis/mixing terminology naturally — do not over-explain \ +standard concepts (e.g. LFO, sidechain, wet/dry) unless the user asks. Sources: {context_block}