perf: Added optimize CLI subcommand with leaderboard table, ASCII traje…
- "backend/pipeline/quality/__main__.py" - "backend/pipeline/quality/results/.gitkeep" GSD-Task: S03/T02
This commit is contained in:
parent
0d82b2b409
commit
03373f263d
2 changed files with 246 additions and 3 deletions
|
|
@ -3,6 +3,7 @@
|
||||||
Subcommands:
|
Subcommands:
|
||||||
fitness — Run LLM fitness tests across four categories
|
fitness — Run LLM fitness tests across four categories
|
||||||
score — Score a Stage 5 technique page across 5 quality dimensions
|
score — Score a Stage 5 technique page across 5 quality dimensions
|
||||||
|
optimize — Automated prompt optimization loop with leaderboard output
|
||||||
|
|
||||||
Run with: python -m pipeline.quality <command>
|
Run with: python -m pipeline.quality <command>
|
||||||
"""
|
"""
|
||||||
|
|
@ -11,12 +12,150 @@ from __future__ import annotations
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from config import get_settings
|
from config import get_settings
|
||||||
from pipeline.llm_client import LLMClient
|
from pipeline.llm_client import LLMClient
|
||||||
|
|
||||||
from .fitness import FitnessRunner
|
from .fitness import FitnessRunner
|
||||||
from .scorer import ScoreRunner
|
from .optimizer import OptimizationLoop, OptimizationResult
|
||||||
|
from .scorer import DIMENSIONS, ScoreRunner
|
||||||
|
|
||||||
|
|
||||||
|
# ── Reporting helpers ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def print_leaderboard(result: OptimizationResult) -> None:
|
||||||
|
"""Print a formatted leaderboard of top 5 variants by composite score."""
|
||||||
|
# Filter to entries that actually scored (no errors)
|
||||||
|
scored = [h for h in result.history if not h.get("error")]
|
||||||
|
if not scored:
|
||||||
|
print("\n No successfully scored variants to rank.\n")
|
||||||
|
return
|
||||||
|
|
||||||
|
ranked = sorted(scored, key=lambda h: h["composite"], reverse=True)[:5]
|
||||||
|
|
||||||
|
print(f"\n{'='*72}")
|
||||||
|
print(" LEADERBOARD — Top 5 Variants by Composite Score")
|
||||||
|
print(f"{'='*72}")
|
||||||
|
|
||||||
|
# Header
|
||||||
|
dim_headers = " ".join(f"{d[:5]:>5s}" for d in DIMENSIONS)
|
||||||
|
print(f" {'#':>2s} {'Label':<16s} {'Comp':>5s} {dim_headers}")
|
||||||
|
print(f" {'─'*2} {'─'*16} {'─'*5} {'─'*5} {'─'*5} {'─'*5} {'─'*5} {'─'*5}")
|
||||||
|
|
||||||
|
for i, entry in enumerate(ranked, 1):
|
||||||
|
label = entry.get("label", "?")[:16]
|
||||||
|
comp = entry["composite"]
|
||||||
|
dim_vals = " ".join(
|
||||||
|
f"{entry['scores'].get(d, 0.0):5.2f}" for d in DIMENSIONS
|
||||||
|
)
|
||||||
|
bar = "█" * int(comp * 20) + "░" * (20 - int(comp * 20))
|
||||||
|
print(f" {i:>2d} {label:<16s} {comp:5.3f} {dim_vals} {bar}")
|
||||||
|
|
||||||
|
print(f"{'='*72}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def print_trajectory(result: OptimizationResult) -> None:
|
||||||
|
"""Print an ASCII chart of composite score across iterations."""
|
||||||
|
scored = [h for h in result.history if not h.get("error")]
|
||||||
|
if len(scored) < 2:
|
||||||
|
print(" (Not enough data points for trajectory chart)\n")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get the best composite per iteration
|
||||||
|
iter_best: dict[int, float] = {}
|
||||||
|
for h in scored:
|
||||||
|
it = h["iteration"]
|
||||||
|
if it not in iter_best or h["composite"] > iter_best[it]:
|
||||||
|
iter_best[it] = h["composite"]
|
||||||
|
|
||||||
|
iterations = sorted(iter_best.keys())
|
||||||
|
values = [iter_best[it] for it in iterations]
|
||||||
|
|
||||||
|
# Chart dimensions
|
||||||
|
chart_height = 15
|
||||||
|
min_val = max(0.0, min(values) - 0.05)
|
||||||
|
max_val = min(1.0, max(values) + 0.05)
|
||||||
|
val_range = max_val - min_val
|
||||||
|
if val_range < 0.01:
|
||||||
|
val_range = 0.1
|
||||||
|
min_val = max(0.0, values[0] - 0.05)
|
||||||
|
max_val = min_val + val_range
|
||||||
|
|
||||||
|
print(f" {'─'*50}")
|
||||||
|
print(" SCORE TRAJECTORY — Best Composite per Iteration")
|
||||||
|
print(f" {'─'*50}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Render rows top to bottom
|
||||||
|
for row in range(chart_height, -1, -1):
|
||||||
|
threshold = min_val + (row / chart_height) * val_range
|
||||||
|
# Y-axis label every 5 rows
|
||||||
|
if row % 5 == 0:
|
||||||
|
label = f"{threshold:.2f}"
|
||||||
|
else:
|
||||||
|
label = " "
|
||||||
|
line = f" {label} │"
|
||||||
|
|
||||||
|
for vi, val in enumerate(values):
|
||||||
|
normalized = (val - min_val) / val_range
|
||||||
|
filled_rows = int(normalized * chart_height)
|
||||||
|
if filled_rows >= row:
|
||||||
|
line += " ● "
|
||||||
|
else:
|
||||||
|
line += " · "
|
||||||
|
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
# X-axis
|
||||||
|
print(f" ───── ┼{'───' * len(values)}")
|
||||||
|
x_labels = " " + " "
|
||||||
|
for it in iterations:
|
||||||
|
x_labels += f"{it:>2d} "
|
||||||
|
print(x_labels)
|
||||||
|
print(" " + " iteration →")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def write_results_json(
|
||||||
|
result: OptimizationResult,
|
||||||
|
output_dir: str,
|
||||||
|
stage: int,
|
||||||
|
iterations: int,
|
||||||
|
variants_per_iter: int,
|
||||||
|
fixture_path: str,
|
||||||
|
) -> str:
|
||||||
|
"""Write optimization results to a timestamped JSON file. Returns the path."""
|
||||||
|
out_path = Path(output_dir)
|
||||||
|
out_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||||||
|
filename = f"optimize_stage{stage}_{timestamp}.json"
|
||||||
|
filepath = out_path / filename
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"config": {
|
||||||
|
"stage": stage,
|
||||||
|
"iterations": iterations,
|
||||||
|
"variants_per_iter": variants_per_iter,
|
||||||
|
"fixture_path": fixture_path,
|
||||||
|
},
|
||||||
|
"best_prompt": result.best_prompt,
|
||||||
|
"best_scores": {
|
||||||
|
"composite": result.best_score.composite,
|
||||||
|
**{d: getattr(result.best_score, d) for d in DIMENSIONS},
|
||||||
|
},
|
||||||
|
"elapsed_seconds": result.elapsed_seconds,
|
||||||
|
"history": result.history,
|
||||||
|
}
|
||||||
|
|
||||||
|
filepath.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
||||||
|
return str(filepath)
|
||||||
|
|
||||||
|
|
||||||
|
# ── CLI ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
|
|
@ -52,6 +191,42 @@ def main() -> int:
|
||||||
help="Voice preservation dial (0.0=clinical, 1.0=maximum voice). Triggers re-synthesis before scoring.",
|
help="Voice preservation dial (0.0=clinical, 1.0=maximum voice). Triggers re-synthesis before scoring.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# -- optimize subcommand --
|
||||||
|
opt_parser = sub.add_parser(
|
||||||
|
"optimize",
|
||||||
|
help="Automated prompt optimization loop with leaderboard output",
|
||||||
|
)
|
||||||
|
opt_parser.add_argument(
|
||||||
|
"--stage",
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help="Pipeline stage to optimize (default: 5)",
|
||||||
|
)
|
||||||
|
opt_parser.add_argument(
|
||||||
|
"--iterations",
|
||||||
|
type=int,
|
||||||
|
default=10,
|
||||||
|
help="Number of optimization iterations (default: 10)",
|
||||||
|
)
|
||||||
|
opt_parser.add_argument(
|
||||||
|
"--variants-per-iter",
|
||||||
|
type=int,
|
||||||
|
default=2,
|
||||||
|
help="Variants generated per iteration (default: 2)",
|
||||||
|
)
|
||||||
|
opt_parser.add_argument(
|
||||||
|
"--file",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="Path to moments JSON fixture file",
|
||||||
|
)
|
||||||
|
opt_parser.add_argument(
|
||||||
|
"--output-dir",
|
||||||
|
type=str,
|
||||||
|
default="backend/pipeline/quality/results/",
|
||||||
|
help="Directory to write result JSON (default: backend/pipeline/quality/results/)",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.command is None:
|
if args.command is None:
|
||||||
|
|
@ -67,6 +242,9 @@ def main() -> int:
|
||||||
if args.command == "score":
|
if args.command == "score":
|
||||||
return _run_score(args)
|
return _run_score(args)
|
||||||
|
|
||||||
|
if args.command == "optimize":
|
||||||
|
return _run_optimize(args)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -141,5 +319,70 @@ def _run_score(args: argparse.Namespace) -> int:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _run_optimize(args: argparse.Namespace) -> int:
|
||||||
|
"""Execute the optimize subcommand."""
|
||||||
|
# Stage validation — only stage 5 is supported
|
||||||
|
if args.stage != 5:
|
||||||
|
print(
|
||||||
|
f"Error: only stage 5 is supported for optimization (got stage {args.stage})",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Validate fixture file exists
|
||||||
|
fixture = Path(args.file)
|
||||||
|
if not fixture.exists():
|
||||||
|
print(f"Error: fixture file not found: {args.file}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Ensure output dir
|
||||||
|
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
client = LLMClient(settings)
|
||||||
|
|
||||||
|
loop = OptimizationLoop(
|
||||||
|
client=client,
|
||||||
|
stage=args.stage,
|
||||||
|
fixture_path=args.file,
|
||||||
|
iterations=args.iterations,
|
||||||
|
variants_per_iter=args.variants_per_iter,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = loop.run()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n Optimization interrupted by user.", file=sys.stderr)
|
||||||
|
return 130
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"\nError: optimization failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# If the loop returned an error on baseline, report and exit
|
||||||
|
if result.best_score.error and not result.history:
|
||||||
|
print(f"\nError: {result.best_score.error}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Reporting
|
||||||
|
print_leaderboard(result)
|
||||||
|
print_trajectory(result)
|
||||||
|
|
||||||
|
# Write results JSON
|
||||||
|
try:
|
||||||
|
json_path = write_results_json(
|
||||||
|
result=result,
|
||||||
|
output_dir=args.output_dir,
|
||||||
|
stage=args.stage,
|
||||||
|
iterations=args.iterations,
|
||||||
|
variants_per_iter=args.variants_per_iter,
|
||||||
|
fixture_path=args.file,
|
||||||
|
)
|
||||||
|
print(f" Results written to: {json_path}")
|
||||||
|
except OSError as exc:
|
||||||
|
print(f" Warning: failed to write results JSON: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|
|
||||||
0
backend/pipeline/quality/results/.gitkeep
Normal file
0
backend/pipeline/quality/results/.gitkeep
Normal file
Loading…
Add table
Reference in a new issue