"""FYN-LLM quality assurance toolkit. Subcommands: fitness — Run LLM fitness tests across four categories score — Score a Stage 5 technique page across 5 quality dimensions Run with: python -m pipeline.quality """ from __future__ import annotations import argparse import json import sys from config import get_settings from pipeline.llm_client import LLMClient from .fitness import FitnessRunner from .scorer import ScoreRunner def main() -> int: parser = argparse.ArgumentParser( prog="pipeline.quality", description="FYN-LLM quality assurance toolkit", ) sub = parser.add_subparsers(dest="command") # -- fitness subcommand -- sub.add_parser("fitness", help="Run LLM fitness tests across four categories") # -- score subcommand -- score_parser = sub.add_parser( "score", help="Score a Stage 5 technique page across 5 quality dimensions", ) source_group = score_parser.add_mutually_exclusive_group(required=True) source_group.add_argument( "--file", type=str, help="Path to a moments JSON file (creator_name, moments array)", ) source_group.add_argument( "--slug", type=str, help="Technique slug to load from the database", ) score_parser.add_argument( "--voice-level", type=float, default=None, help="Voice preservation dial (0.0=clinical, 1.0=maximum voice). Triggers re-synthesis before scoring.", ) args = parser.parse_args() if args.command is None: parser.print_help() return 1 if args.command == "fitness": settings = get_settings() client = LLMClient(settings) runner = FitnessRunner(client) return runner.run_all() if args.command == "score": return _run_score(args) return 0 def _run_score(args: argparse.Namespace) -> int: """Execute the score subcommand.""" # -- Load source data -- if args.slug: print("DB loading not yet implemented", file=sys.stderr) return 1 try: with open(args.file) as f: data = json.load(f) except FileNotFoundError: print(f"File not found: {args.file}", file=sys.stderr) return 1 except json.JSONDecodeError as exc: print(f"Invalid JSON in {args.file}: {exc}", file=sys.stderr) return 1 moments = data.get("moments", []) creator_name = data.get("creator_name", "Unknown") if not moments: print("No moments found in input file", file=sys.stderr) return 1 settings = get_settings() client = LLMClient(settings) runner = ScoreRunner(client) # -- Voice-level mode: re-synthesize then score -- if args.voice_level is not None: voice_level = args.voice_level if not (0.0 <= voice_level <= 1.0): print("--voice-level must be between 0.0 and 1.0", file=sys.stderr) return 1 print(f"\nRe-synthesizing + scoring for '{creator_name}' ({len(moments)} moments, voice_level={voice_level})...") result = runner.synthesize_and_score(moments, creator_name, voice_level) if result.error: runner.print_report(result) return 1 runner.print_report(result) return 0 # -- Standard mode: build page stub from moments, score directly -- page_json = { "title": f"{creator_name} — Technique Page", "creator_name": creator_name, "summary": f"Technique page synthesized from {len(moments)} key moments.", "body_sections": [ { "heading": m.get("topic_tags", ["Technique"])[0] if m.get("topic_tags") else "Technique", "content": m.get("summary", "") + "\n\n" + m.get("transcript_excerpt", ""), } for m in moments ], } print(f"\nScoring page for '{creator_name}' ({len(moments)} moments)...") result = runner.score_page(page_json, moments) if result.error: runner.print_report(result) return 1 runner.print_report(result) return 0 if __name__ == "__main__": sys.exit(main())