"""FYN-LLM quality assurance toolkit. Subcommands: fitness — Run LLM fitness tests across four categories score — Score a Stage 5 technique page across 5 quality dimensions Run with: python -m pipeline.quality """ from __future__ import annotations import argparse import json import sys from config import get_settings from pipeline.llm_client import LLMClient from .fitness import FitnessRunner from .scorer import ScoreRunner def main() -> int: parser = argparse.ArgumentParser( prog="pipeline.quality", description="FYN-LLM quality assurance toolkit", ) sub = parser.add_subparsers(dest="command") # -- fitness subcommand -- sub.add_parser("fitness", help="Run LLM fitness tests across four categories") # -- score subcommand -- score_parser = sub.add_parser( "score", help="Score a Stage 5 technique page across 5 quality dimensions", ) source_group = score_parser.add_mutually_exclusive_group(required=True) source_group.add_argument( "--file", type=str, help="Path to a moments JSON file (creator_name, moments array)", ) source_group.add_argument( "--slug", type=str, help="Technique slug to load from the database", ) score_parser.add_argument( "--voice-level", type=float, default=None, help="Voice preservation dial (0.0=clinical, 1.0=maximum voice). Triggers re-synthesis before scoring.", ) args = parser.parse_args() if args.command is None: parser.print_help() return 1 if args.command == "fitness": settings = get_settings() client = LLMClient(settings) runner = FitnessRunner(client) return runner.run_all() if args.command == "score": return _run_score(args) return 0 def _run_score(args: argparse.Namespace) -> int: """Execute the score subcommand.""" # -- Load source data -- if args.slug: print("DB loading not yet implemented", file=sys.stderr) return 1 try: with open(args.file) as f: data = json.load(f) except FileNotFoundError: print(f"File not found: {args.file}", file=sys.stderr) return 1 except json.JSONDecodeError as exc: print(f"Invalid JSON in {args.file}: {exc}", file=sys.stderr) return 1 moments = data.get("moments", []) creator_name = data.get("creator_name", "Unknown") if not moments: print("No moments found in input file", file=sys.stderr) return 1 # -- Build page stub from moments for scoring -- # When --voice-level is set, T02 will re-synthesize. For now, build a # minimal page representation from the moments so the scorer has # something to evaluate. page_json = { "title": f"{creator_name} — Technique Page", "creator_name": creator_name, "summary": f"Technique page synthesized from {len(moments)} key moments.", "body_sections": [ { "heading": m.get("topic_tags", ["Technique"])[0] if m.get("topic_tags") else "Technique", "content": m.get("summary", "") + "\n\n" + m.get("transcript_excerpt", ""), } for m in moments ], } settings = get_settings() client = LLMClient(settings) runner = ScoreRunner(client) print(f"\nScoring page for '{creator_name}' ({len(moments)} moments)...") result = runner.score_page(page_json, moments) if result.error: runner.print_report(result) return 1 runner.print_report(result) return 0 if __name__ == "__main__": sys.exit(main())