chrysopedia/backend/pipeline/quality/fixtures/chat_test_suite.yaml
jlightner 846db2aad5 test: Created chat-specific LLM-as-judge scorer (5 dimensions), SSE-par…
- "backend/pipeline/quality/chat_scorer.py"
- "backend/pipeline/quality/chat_eval.py"
- "backend/pipeline/quality/fixtures/chat_test_suite.yaml"
- "backend/pipeline/quality/__main__.py"

GSD-Task: S09/T01
2026-04-04 14:43:52 +00:00

72 lines
3.7 KiB
YAML

# Chat quality evaluation test suite
# 10 representative queries across 4 categories:
# - technical: How-to questions about specific production techniques
# - conceptual: Broader understanding questions about audio concepts
# - creator: Creator-scoped queries at different personality weights
# - cross_creator: Queries spanning multiple creators' approaches
queries:
# ── Technical how-to (2) ────────────────────────────────────────────
- query: "How do I set up sidechain compression on a bass synth using a kick drum as the trigger?"
creator: null
personality_weight: 0.0
category: technical
description: "Common sidechain compression setup — expects specific settings (ratio, attack, release)"
- query: "What are the best EQ settings for cleaning up a muddy vocal recording?"
creator: null
personality_weight: 0.0
category: technical
description: "Vocal EQ technique — expects frequency ranges, Q values, cut/boost guidance"
# ── Conceptual (2) ─────────────────────────────────────────────────
- query: "What is the difference between parallel compression and serial compression, and when should I use each?"
creator: null
personality_weight: 0.0
category: conceptual
description: "Conceptual comparison — expects clear definitions, use cases, pros/cons"
- query: "How does sample rate affect sound quality in music production?"
creator: null
personality_weight: 0.0
category: conceptual
description: "Audio fundamentals — expects Nyquist, aliasing, practical guidance"
# ── Creator-specific: encyclopedic (2) ──────────────────────────────
- query: "How does this creator approach sound design for bass sounds?"
creator: "KEOTA"
personality_weight: 0.0
category: creator_encyclopedic
description: "Creator-scoped query at weight=0 — should be neutral/encyclopedic about KEOTA's techniques"
- query: "What mixing techniques does this creator recommend for achieving width in a mix?"
creator: "Mr. Bill"
personality_weight: 0.0
category: creator_encyclopedic
description: "Creator-scoped query at weight=0 — neutral tone about Mr. Bill's approach"
# ── Creator-specific: personality (2) ───────────────────────────────
- query: "How does this creator approach sound design for bass sounds?"
creator: "KEOTA"
personality_weight: 0.7
category: creator_personality
description: "Same query as above but at weight=0.7 — should reflect KEOTA's voice and teaching style"
- query: "What mixing techniques does this creator recommend for achieving width in a mix?"
creator: "Mr. Bill"
personality_weight: 0.7
category: creator_personality
description: "Same query as above but at weight=0.7 — should reflect Mr. Bill's voice"
# ── Cross-creator (2) ──────────────────────────────────────────────
- query: "What are the different approaches to layering synth sounds across creators?"
creator: null
personality_weight: 0.0
category: cross_creator
description: "Cross-creator comparison — should cite multiple creators' techniques"
- query: "How do different producers approach drum processing and what plugins do they prefer?"
creator: null
personality_weight: 0.0
category: cross_creator
description: "Cross-creator comparison on drums — expects multiple perspectives with citations"