chrysopedia/backend/pipeline/quality/__main__.py
jlightner 5223772756 feat: Built ScoreRunner with 5-dimension LLM-as-judge scoring rubric, C…
- "backend/pipeline/quality/scorer.py"
- "backend/pipeline/quality/__main__.py"
- "backend/pipeline/quality/fixtures/sample_moments.json"
- "backend/pipeline/quality/fixtures/__init__.py"

GSD-Task: S02/T01
2026-04-01 08:53:40 +00:00

131 lines
3.6 KiB
Python

"""FYN-LLM quality assurance toolkit.
Subcommands:
fitness — Run LLM fitness tests across four categories
score — Score a Stage 5 technique page across 5 quality dimensions
Run with: python -m pipeline.quality <command>
"""
from __future__ import annotations
import argparse
import json
import sys
from config import get_settings
from pipeline.llm_client import LLMClient
from .fitness import FitnessRunner
from .scorer import ScoreRunner
def main() -> int:
parser = argparse.ArgumentParser(
prog="pipeline.quality",
description="FYN-LLM quality assurance toolkit",
)
sub = parser.add_subparsers(dest="command")
# -- fitness subcommand --
sub.add_parser("fitness", help="Run LLM fitness tests across four categories")
# -- score subcommand --
score_parser = sub.add_parser(
"score",
help="Score a Stage 5 technique page across 5 quality dimensions",
)
source_group = score_parser.add_mutually_exclusive_group(required=True)
source_group.add_argument(
"--file",
type=str,
help="Path to a moments JSON file (creator_name, moments array)",
)
source_group.add_argument(
"--slug",
type=str,
help="Technique slug to load from the database",
)
score_parser.add_argument(
"--voice-level",
type=float,
default=None,
help="Voice preservation dial (0.0=clinical, 1.0=maximum voice). Triggers re-synthesis before scoring.",
)
args = parser.parse_args()
if args.command is None:
parser.print_help()
return 1
if args.command == "fitness":
settings = get_settings()
client = LLMClient(settings)
runner = FitnessRunner(client)
return runner.run_all()
if args.command == "score":
return _run_score(args)
return 0
def _run_score(args: argparse.Namespace) -> int:
"""Execute the score subcommand."""
# -- Load source data --
if args.slug:
print("DB loading not yet implemented", file=sys.stderr)
return 1
try:
with open(args.file) as f:
data = json.load(f)
except FileNotFoundError:
print(f"File not found: {args.file}", file=sys.stderr)
return 1
except json.JSONDecodeError as exc:
print(f"Invalid JSON in {args.file}: {exc}", file=sys.stderr)
return 1
moments = data.get("moments", [])
creator_name = data.get("creator_name", "Unknown")
if not moments:
print("No moments found in input file", file=sys.stderr)
return 1
# -- Build page stub from moments for scoring --
# When --voice-level is set, T02 will re-synthesize. For now, build a
# minimal page representation from the moments so the scorer has
# something to evaluate.
page_json = {
"title": f"{creator_name} — Technique Page",
"creator_name": creator_name,
"summary": f"Technique page synthesized from {len(moments)} key moments.",
"body_sections": [
{
"heading": m.get("topic_tags", ["Technique"])[0] if m.get("topic_tags") else "Technique",
"content": m.get("summary", "") + "\n\n" + m.get("transcript_excerpt", ""),
}
for m in moments
],
}
settings = get_settings()
client = LLMClient(settings)
runner = ScoreRunner(client)
print(f"\nScoring page for '{creator_name}' ({len(moments)} moments)...")
result = runner.score_page(page_json, moments)
if result.error:
runner.print_report(result)
return 1
runner.print_report(result)
return 0
if __name__ == "__main__":
sys.exit(main())