- ".gsd/milestones/M025/slices/S09/S09-QUALITY-REPORT.md" - "backend/pipeline/quality/results/chat_eval_baseline.json" GSD-Task: S09/T03
91 lines
4.2 KiB
JSON
91 lines
4.2 KiB
JSON
{
|
|
"timestamp": "20260404_043200",
|
|
"evaluation_method": "manual_curl",
|
|
"llm_status": "unavailable (upstream 502 Bad Gateway at chat.forgetyour.name)",
|
|
"api_health": "ok",
|
|
"total_queries": 6,
|
|
"scored_queries": 0,
|
|
"errors_llm": 6,
|
|
"note": "LLM completions unavailable — only source retrieval quality assessed. Re-run with automated eval when LLM proxy is restored.",
|
|
"source_retrieval_results": [
|
|
{
|
|
"query": "How do I set up sidechain compression on a bass synth using a kick drum as the trigger?",
|
|
"creator": null,
|
|
"personality_weight": 0.0,
|
|
"category": "technical",
|
|
"source_count": 10,
|
|
"unique_creators": ["Break", "Caracal Project, The", "Chee", "KOAN Sound"],
|
|
"creator_distribution": {"Break": 3, "Caracal Project, The": 2, "Chee": 2, "KOAN Sound": 1},
|
|
"relevance_assessment": "highly_relevant",
|
|
"notes": "All 10 sources directly about sidechain compression. Good creator diversity."
|
|
},
|
|
{
|
|
"query": "What are the different approaches to layering synth sounds across creators?",
|
|
"creator": null,
|
|
"personality_weight": 0.0,
|
|
"category": "cross_creator",
|
|
"source_count": 10,
|
|
"unique_creators": ["Chee", "COPYCATT", "Caracal Project, The", "Current Value", "Emperor"],
|
|
"creator_distribution": {"Chee": 5, "COPYCATT": 2, "Caracal Project, The": 1, "Current Value": 1, "Emperor": 1},
|
|
"relevance_assessment": "relevant_but_skewed",
|
|
"notes": "50% of sources from Chee — cross-creator diversity could be improved."
|
|
},
|
|
{
|
|
"query": "How does this creator approach sound design for bass sounds?",
|
|
"creator": "Keota",
|
|
"personality_weight": 0.0,
|
|
"category": "creator_encyclopedic",
|
|
"source_count": 10,
|
|
"unique_creators": ["COPYCATT", "Break", "Chee", "Caracal Project, The"],
|
|
"creator_distribution": {"COPYCATT": 2, "Break": 2, "Chee": 3, "Caracal Project, The": 3},
|
|
"relevance_assessment": "creator_scope_failure",
|
|
"notes": "Zero sources from Keota despite creator-scoped query. Cascade fell through to global tier."
|
|
},
|
|
{
|
|
"query": "What mixing techniques does this creator recommend for achieving width in a mix?",
|
|
"creator": "Mr. Bill",
|
|
"personality_weight": 0.0,
|
|
"category": "creator_encyclopedic",
|
|
"source_count": 10,
|
|
"unique_creators": ["Break", "Frequent", "Caracal Project, The", "COPYCATT", "Chee"],
|
|
"creator_distribution": {"Break": 2, "Frequent": 1, "Caracal Project, The": 2, "COPYCATT": 2, "Chee": 3},
|
|
"relevance_assessment": "creator_scope_failure",
|
|
"notes": "Zero sources from Mr. Bill despite creator-scoped query."
|
|
},
|
|
{
|
|
"query": "How does this creator approach sound design for bass sounds? (personality)",
|
|
"creator": "Keota",
|
|
"personality_weight": 0.7,
|
|
"category": "creator_personality",
|
|
"source_count": 10,
|
|
"personality_profile_exists": false,
|
|
"notes": "Personality weight=0.7 accepted but no profile data exists — falls back to encyclopedic mode silently."
|
|
},
|
|
{
|
|
"query": "What mixing techniques does this creator recommend for width? (personality)",
|
|
"creator": "Mr. Bill",
|
|
"personality_weight": 0.7,
|
|
"category": "creator_personality",
|
|
"source_count": 10,
|
|
"personality_profile_exists": false,
|
|
"notes": "Personality weight=0.7 accepted but no profile data exists — falls back to encyclopedic mode silently."
|
|
}
|
|
],
|
|
"personality_profiles_status": {
|
|
"total_creators": 25,
|
|
"creators_with_profile": 0,
|
|
"assessment": "No personality profiles populated. The 5-tier progressive injection system is architecturally complete (26 unit tests pass) but functionally inert on the live system."
|
|
},
|
|
"prompt_changes": {
|
|
"before_lines": 4,
|
|
"after_lines": 18,
|
|
"changes": [
|
|
"Added structured citation guidance with inline example",
|
|
"Added response format section (2-4 paragraphs, bullet lists, bold terms)",
|
|
"Added domain awareness (music production subdomain list)",
|
|
"Added conflicting source handling instruction",
|
|
"Added response length guidance"
|
|
],
|
|
"test_impact": "Zero test modifications needed — all 26 tests pass unchanged"
|
|
}
|
|
}
|