- "backend/pipeline/quality/chat_scorer.py" - "backend/pipeline/quality/chat_eval.py" - "backend/pipeline/quality/fixtures/chat_test_suite.yaml" - "backend/pipeline/quality/__main__.py" GSD-Task: S09/T01
72 lines
3.7 KiB
YAML
72 lines
3.7 KiB
YAML
# Chat quality evaluation test suite
|
|
# 10 representative queries across 4 categories:
|
|
# - technical: How-to questions about specific production techniques
|
|
# - conceptual: Broader understanding questions about audio concepts
|
|
# - creator: Creator-scoped queries at different personality weights
|
|
# - cross_creator: Queries spanning multiple creators' approaches
|
|
|
|
queries:
|
|
# ── Technical how-to (2) ────────────────────────────────────────────
|
|
- query: "How do I set up sidechain compression on a bass synth using a kick drum as the trigger?"
|
|
creator: null
|
|
personality_weight: 0.0
|
|
category: technical
|
|
description: "Common sidechain compression setup — expects specific settings (ratio, attack, release)"
|
|
|
|
- query: "What are the best EQ settings for cleaning up a muddy vocal recording?"
|
|
creator: null
|
|
personality_weight: 0.0
|
|
category: technical
|
|
description: "Vocal EQ technique — expects frequency ranges, Q values, cut/boost guidance"
|
|
|
|
# ── Conceptual (2) ─────────────────────────────────────────────────
|
|
- query: "What is the difference between parallel compression and serial compression, and when should I use each?"
|
|
creator: null
|
|
personality_weight: 0.0
|
|
category: conceptual
|
|
description: "Conceptual comparison — expects clear definitions, use cases, pros/cons"
|
|
|
|
- query: "How does sample rate affect sound quality in music production?"
|
|
creator: null
|
|
personality_weight: 0.0
|
|
category: conceptual
|
|
description: "Audio fundamentals — expects Nyquist, aliasing, practical guidance"
|
|
|
|
# ── Creator-specific: encyclopedic (2) ──────────────────────────────
|
|
- query: "How does this creator approach sound design for bass sounds?"
|
|
creator: "KEOTA"
|
|
personality_weight: 0.0
|
|
category: creator_encyclopedic
|
|
description: "Creator-scoped query at weight=0 — should be neutral/encyclopedic about KEOTA's techniques"
|
|
|
|
- query: "What mixing techniques does this creator recommend for achieving width in a mix?"
|
|
creator: "Mr. Bill"
|
|
personality_weight: 0.0
|
|
category: creator_encyclopedic
|
|
description: "Creator-scoped query at weight=0 — neutral tone about Mr. Bill's approach"
|
|
|
|
# ── Creator-specific: personality (2) ───────────────────────────────
|
|
- query: "How does this creator approach sound design for bass sounds?"
|
|
creator: "KEOTA"
|
|
personality_weight: 0.7
|
|
category: creator_personality
|
|
description: "Same query as above but at weight=0.7 — should reflect KEOTA's voice and teaching style"
|
|
|
|
- query: "What mixing techniques does this creator recommend for achieving width in a mix?"
|
|
creator: "Mr. Bill"
|
|
personality_weight: 0.7
|
|
category: creator_personality
|
|
description: "Same query as above but at weight=0.7 — should reflect Mr. Bill's voice"
|
|
|
|
# ── Cross-creator (2) ──────────────────────────────────────────────
|
|
- query: "What are the different approaches to layering synth sounds across creators?"
|
|
creator: null
|
|
personality_weight: 0.0
|
|
category: cross_creator
|
|
description: "Cross-creator comparison — should cite multiple creators' techniques"
|
|
|
|
- query: "How do different producers approach drum processing and what plugins do they prefer?"
|
|
creator: null
|
|
personality_weight: 0.0
|
|
category: cross_creator
|
|
description: "Cross-creator comparison on drums — expects multiple perspectives with citations"
|