From 03373f263dd8a3967475047e10b590c02fd7f186 Mon Sep 17 00:00:00 2001 From: jlightner Date: Wed, 1 Apr 2026 09:10:42 +0000 Subject: [PATCH] =?UTF-8?q?perf:=20Added=20optimize=20CLI=20subcommand=20w?= =?UTF-8?q?ith=20leaderboard=20table,=20ASCII=20traje=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "backend/pipeline/quality/__main__.py" - "backend/pipeline/quality/results/.gitkeep" GSD-Task: S03/T02 --- backend/pipeline/quality/__main__.py | 249 +++++++++++++++++++++- backend/pipeline/quality/results/.gitkeep | 0 2 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 backend/pipeline/quality/results/.gitkeep diff --git a/backend/pipeline/quality/__main__.py b/backend/pipeline/quality/__main__.py index 8811ee6..7b9a1e2 100644 --- a/backend/pipeline/quality/__main__.py +++ b/backend/pipeline/quality/__main__.py @@ -1,8 +1,9 @@ """FYN-LLM quality assurance toolkit. Subcommands: - fitness — Run LLM fitness tests across four categories - score — Score a Stage 5 technique page across 5 quality dimensions + fitness — Run LLM fitness tests across four categories + score — Score a Stage 5 technique page across 5 quality dimensions + optimize — Automated prompt optimization loop with leaderboard output Run with: python -m pipeline.quality """ @@ -11,12 +12,150 @@ from __future__ import annotations import argparse import json import sys +from datetime import datetime, timezone +from pathlib import Path from config import get_settings from pipeline.llm_client import LLMClient from .fitness import FitnessRunner -from .scorer import ScoreRunner +from .optimizer import OptimizationLoop, OptimizationResult +from .scorer import DIMENSIONS, ScoreRunner + + +# ── Reporting helpers ──────────────────────────────────────────────────────── + + +def print_leaderboard(result: OptimizationResult) -> None: + """Print a formatted leaderboard of top 5 variants by composite score.""" + # Filter to entries that actually scored (no errors) + scored = [h for h in result.history if not h.get("error")] + if not scored: + print("\n No successfully scored variants to rank.\n") + return + + ranked = sorted(scored, key=lambda h: h["composite"], reverse=True)[:5] + + print(f"\n{'='*72}") + print(" LEADERBOARD — Top 5 Variants by Composite Score") + print(f"{'='*72}") + + # Header + dim_headers = " ".join(f"{d[:5]:>5s}" for d in DIMENSIONS) + print(f" {'#':>2s} {'Label':<16s} {'Comp':>5s} {dim_headers}") + print(f" {'─'*2} {'─'*16} {'─'*5} {'─'*5} {'─'*5} {'─'*5} {'─'*5} {'─'*5}") + + for i, entry in enumerate(ranked, 1): + label = entry.get("label", "?")[:16] + comp = entry["composite"] + dim_vals = " ".join( + f"{entry['scores'].get(d, 0.0):5.2f}" for d in DIMENSIONS + ) + bar = "█" * int(comp * 20) + "░" * (20 - int(comp * 20)) + print(f" {i:>2d} {label:<16s} {comp:5.3f} {dim_vals} {bar}") + + print(f"{'='*72}\n") + + +def print_trajectory(result: OptimizationResult) -> None: + """Print an ASCII chart of composite score across iterations.""" + scored = [h for h in result.history if not h.get("error")] + if len(scored) < 2: + print(" (Not enough data points for trajectory chart)\n") + return + + # Get the best composite per iteration + iter_best: dict[int, float] = {} + for h in scored: + it = h["iteration"] + if it not in iter_best or h["composite"] > iter_best[it]: + iter_best[it] = h["composite"] + + iterations = sorted(iter_best.keys()) + values = [iter_best[it] for it in iterations] + + # Chart dimensions + chart_height = 15 + min_val = max(0.0, min(values) - 0.05) + max_val = min(1.0, max(values) + 0.05) + val_range = max_val - min_val + if val_range < 0.01: + val_range = 0.1 + min_val = max(0.0, values[0] - 0.05) + max_val = min_val + val_range + + print(f" {'─'*50}") + print(" SCORE TRAJECTORY — Best Composite per Iteration") + print(f" {'─'*50}") + print() + + # Render rows top to bottom + for row in range(chart_height, -1, -1): + threshold = min_val + (row / chart_height) * val_range + # Y-axis label every 5 rows + if row % 5 == 0: + label = f"{threshold:.2f}" + else: + label = " " + line = f" {label} │" + + for vi, val in enumerate(values): + normalized = (val - min_val) / val_range + filled_rows = int(normalized * chart_height) + if filled_rows >= row: + line += " ● " + else: + line += " · " + + print(line) + + # X-axis + print(f" ───── ┼{'───' * len(values)}") + x_labels = " " + " " + for it in iterations: + x_labels += f"{it:>2d} " + print(x_labels) + print(" " + " iteration →") + print() + + +def write_results_json( + result: OptimizationResult, + output_dir: str, + stage: int, + iterations: int, + variants_per_iter: int, + fixture_path: str, +) -> str: + """Write optimization results to a timestamped JSON file. Returns the path.""" + out_path = Path(output_dir) + out_path.mkdir(parents=True, exist_ok=True) + + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + filename = f"optimize_stage{stage}_{timestamp}.json" + filepath = out_path / filename + + payload = { + "config": { + "stage": stage, + "iterations": iterations, + "variants_per_iter": variants_per_iter, + "fixture_path": fixture_path, + }, + "best_prompt": result.best_prompt, + "best_scores": { + "composite": result.best_score.composite, + **{d: getattr(result.best_score, d) for d in DIMENSIONS}, + }, + "elapsed_seconds": result.elapsed_seconds, + "history": result.history, + } + + filepath.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return str(filepath) + + +# ── CLI ────────────────────────────────────────────────────────────────────── def main() -> int: @@ -52,6 +191,42 @@ def main() -> int: help="Voice preservation dial (0.0=clinical, 1.0=maximum voice). Triggers re-synthesis before scoring.", ) + # -- optimize subcommand -- + opt_parser = sub.add_parser( + "optimize", + help="Automated prompt optimization loop with leaderboard output", + ) + opt_parser.add_argument( + "--stage", + type=int, + default=5, + help="Pipeline stage to optimize (default: 5)", + ) + opt_parser.add_argument( + "--iterations", + type=int, + default=10, + help="Number of optimization iterations (default: 10)", + ) + opt_parser.add_argument( + "--variants-per-iter", + type=int, + default=2, + help="Variants generated per iteration (default: 2)", + ) + opt_parser.add_argument( + "--file", + type=str, + required=True, + help="Path to moments JSON fixture file", + ) + opt_parser.add_argument( + "--output-dir", + type=str, + default="backend/pipeline/quality/results/", + help="Directory to write result JSON (default: backend/pipeline/quality/results/)", + ) + args = parser.parse_args() if args.command is None: @@ -67,6 +242,9 @@ def main() -> int: if args.command == "score": return _run_score(args) + if args.command == "optimize": + return _run_optimize(args) + return 0 @@ -141,5 +319,70 @@ def _run_score(args: argparse.Namespace) -> int: return 0 +def _run_optimize(args: argparse.Namespace) -> int: + """Execute the optimize subcommand.""" + # Stage validation — only stage 5 is supported + if args.stage != 5: + print( + f"Error: only stage 5 is supported for optimization (got stage {args.stage})", + file=sys.stderr, + ) + return 1 + + # Validate fixture file exists + fixture = Path(args.file) + if not fixture.exists(): + print(f"Error: fixture file not found: {args.file}", file=sys.stderr) + return 1 + + # Ensure output dir + Path(args.output_dir).mkdir(parents=True, exist_ok=True) + + settings = get_settings() + client = LLMClient(settings) + + loop = OptimizationLoop( + client=client, + stage=args.stage, + fixture_path=args.file, + iterations=args.iterations, + variants_per_iter=args.variants_per_iter, + ) + + try: + result = loop.run() + except KeyboardInterrupt: + print("\n Optimization interrupted by user.", file=sys.stderr) + return 130 + except Exception as exc: + print(f"\nError: optimization failed: {exc}", file=sys.stderr) + return 1 + + # If the loop returned an error on baseline, report and exit + if result.best_score.error and not result.history: + print(f"\nError: {result.best_score.error}", file=sys.stderr) + return 1 + + # Reporting + print_leaderboard(result) + print_trajectory(result) + + # Write results JSON + try: + json_path = write_results_json( + result=result, + output_dir=args.output_dir, + stage=args.stage, + iterations=args.iterations, + variants_per_iter=args.variants_per_iter, + fixture_path=args.file, + ) + print(f" Results written to: {json_path}") + except OSError as exc: + print(f" Warning: failed to write results JSON: {exc}", file=sys.stderr) + + return 0 + + if __name__ == "__main__": sys.exit(main()) diff --git a/backend/pipeline/quality/results/.gitkeep b/backend/pipeline/quality/results/.gitkeep new file mode 100644 index 0000000..e69de29