perf: Added optimize CLI subcommand with leaderboard table, ASCII traje…
- "backend/pipeline/quality/__main__.py" - "backend/pipeline/quality/results/.gitkeep" GSD-Task: S03/T02
This commit is contained in:
parent
c6cbb09dd3
commit
84e85a52b3
5 changed files with 334 additions and 4 deletions
|
|
@ -42,7 +42,7 @@ The existing `ScoreRunner.synthesize_and_score()` already chains Stage 5 synthes
|
|||
- Estimate: 1.5h
|
||||
- Files: backend/pipeline/quality/variant_generator.py, backend/pipeline/quality/optimizer.py
|
||||
- Verify: python -c "from pipeline.quality.variant_generator import PromptVariantGenerator; print('generator ok')" && python -c "from pipeline.quality.optimizer import OptimizationLoop, OptimizationResult; print('optimizer ok')"
|
||||
- [ ] **T02: Wire optimize CLI subcommand with leaderboard and trajectory output** — Add the `optimize` subcommand to `__main__.py` that connects PromptVariantGenerator + OptimizationLoop to the CLI, and add formatted reporting: a leaderboard table and an ASCII score trajectory chart. Write results to a JSON file.
|
||||
- [x] **T02: Added optimize CLI subcommand with leaderboard table, ASCII trajectory chart, and timestamped results JSON output** — Add the `optimize` subcommand to `__main__.py` that connects PromptVariantGenerator + OptimizationLoop to the CLI, and add formatted reporting: a leaderboard table and an ASCII score trajectory chart. Write results to a JSON file.
|
||||
|
||||
## Context
|
||||
|
||||
|
|
|
|||
9
.gsd/milestones/M013/slices/S03/tasks/T01-VERIFY.json
Normal file
9
.gsd/milestones/M013/slices/S03/tasks/T01-VERIFY.json
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"schemaVersion": 1,
|
||||
"taskId": "T01",
|
||||
"unitId": "M013/S03/T01",
|
||||
"timestamp": 1775034481351,
|
||||
"passed": true,
|
||||
"discoverySource": "none",
|
||||
"checks": []
|
||||
}
|
||||
78
.gsd/milestones/M013/slices/S03/tasks/T02-SUMMARY.md
Normal file
78
.gsd/milestones/M013/slices/S03/tasks/T02-SUMMARY.md
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
---
|
||||
id: T02
|
||||
parent: S03
|
||||
milestone: M013
|
||||
provides: []
|
||||
requires: []
|
||||
affects: []
|
||||
key_files: ["backend/pipeline/quality/__main__.py", "backend/pipeline/quality/results/.gitkeep"]
|
||||
key_decisions: ["Reporting functions live in __main__.py rather than separate reporting.py — keeps surface small"]
|
||||
patterns_established: []
|
||||
drill_down_paths: []
|
||||
observability_surfaces: []
|
||||
duration: ""
|
||||
verification_result: "Verified --help shows all 5 args, --stage 3 prints 'only stage 5 supported' and exits 1, nonexistent fixture prints error and exits 1, synthetic data through leaderboard and trajectory renders correctly, JSON writer produces valid output with expected keys, and the full plan verification command exits 0."
|
||||
completed_at: 2026-04-01T09:10:39.869Z
|
||||
blocker_discovered: false
|
||||
---
|
||||
|
||||
# T02: Added optimize CLI subcommand with leaderboard table, ASCII trajectory chart, and timestamped results JSON output
|
||||
|
||||
> Added optimize CLI subcommand with leaderboard table, ASCII trajectory chart, and timestamped results JSON output
|
||||
|
||||
## What Happened
|
||||
---
|
||||
id: T02
|
||||
parent: S03
|
||||
milestone: M013
|
||||
key_files:
|
||||
- backend/pipeline/quality/__main__.py
|
||||
- backend/pipeline/quality/results/.gitkeep
|
||||
key_decisions:
|
||||
- Reporting functions live in __main__.py rather than separate reporting.py — keeps surface small
|
||||
duration: ""
|
||||
verification_result: passed
|
||||
completed_at: 2026-04-01T09:10:39.869Z
|
||||
blocker_discovered: false
|
||||
---
|
||||
|
||||
# T02: Added optimize CLI subcommand with leaderboard table, ASCII trajectory chart, and timestamped results JSON output
|
||||
|
||||
**Added optimize CLI subcommand with leaderboard table, ASCII trajectory chart, and timestamped results JSON output**
|
||||
|
||||
## What Happened
|
||||
|
||||
Extended the quality CLI __main__.py with an `optimize` subparser accepting --stage, --iterations, --variants-per-iter, --file, and --output-dir. Stage validation rejects non-5 stages with a clear error. Three reporting functions added: print_leaderboard() shows top 5 variants ranked by composite with per-dimension breakdown; print_trajectory() renders a 15-row ASCII chart of best composite per iteration; write_results_json() persists full results with config metadata. Created results/.gitkeep for the output directory.
|
||||
|
||||
## Verification
|
||||
|
||||
Verified --help shows all 5 args, --stage 3 prints 'only stage 5 supported' and exits 1, nonexistent fixture prints error and exits 1, synthetic data through leaderboard and trajectory renders correctly, JSON writer produces valid output with expected keys, and the full plan verification command exits 0.
|
||||
|
||||
## Verification Evidence
|
||||
|
||||
| # | Command | Exit Code | Verdict | Duration |
|
||||
|---|---------|-----------|---------|----------|
|
||||
| 1 | `python -m pipeline.quality optimize --help` | 0 | ✅ pass | 400ms |
|
||||
| 2 | `python -m pipeline.quality optimize --stage 3 --iterations 1 --file fixtures/sample_moments.json` | 1 | ✅ pass (expected exit 1) | 300ms |
|
||||
| 3 | `python -m pipeline.quality optimize --help && ... | grep -q 'stage 5'` | 0 | ✅ pass | 500ms |
|
||||
|
||||
|
||||
## Deviations
|
||||
|
||||
None.
|
||||
|
||||
## Known Issues
|
||||
|
||||
None.
|
||||
|
||||
## Files Created/Modified
|
||||
|
||||
- `backend/pipeline/quality/__main__.py`
|
||||
- `backend/pipeline/quality/results/.gitkeep`
|
||||
|
||||
|
||||
## Deviations
|
||||
None.
|
||||
|
||||
## Known Issues
|
||||
None.
|
||||
|
|
@ -1,8 +1,9 @@
|
|||
"""FYN-LLM quality assurance toolkit.
|
||||
|
||||
Subcommands:
|
||||
fitness — Run LLM fitness tests across four categories
|
||||
score — Score a Stage 5 technique page across 5 quality dimensions
|
||||
fitness — Run LLM fitness tests across four categories
|
||||
score — Score a Stage 5 technique page across 5 quality dimensions
|
||||
optimize — Automated prompt optimization loop with leaderboard output
|
||||
|
||||
Run with: python -m pipeline.quality <command>
|
||||
"""
|
||||
|
|
@ -11,12 +12,150 @@ from __future__ import annotations
|
|||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from config import get_settings
|
||||
from pipeline.llm_client import LLMClient
|
||||
|
||||
from .fitness import FitnessRunner
|
||||
from .scorer import ScoreRunner
|
||||
from .optimizer import OptimizationLoop, OptimizationResult
|
||||
from .scorer import DIMENSIONS, ScoreRunner
|
||||
|
||||
|
||||
# ── Reporting helpers ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def print_leaderboard(result: OptimizationResult) -> None:
|
||||
"""Print a formatted leaderboard of top 5 variants by composite score."""
|
||||
# Filter to entries that actually scored (no errors)
|
||||
scored = [h for h in result.history if not h.get("error")]
|
||||
if not scored:
|
||||
print("\n No successfully scored variants to rank.\n")
|
||||
return
|
||||
|
||||
ranked = sorted(scored, key=lambda h: h["composite"], reverse=True)[:5]
|
||||
|
||||
print(f"\n{'='*72}")
|
||||
print(" LEADERBOARD — Top 5 Variants by Composite Score")
|
||||
print(f"{'='*72}")
|
||||
|
||||
# Header
|
||||
dim_headers = " ".join(f"{d[:5]:>5s}" for d in DIMENSIONS)
|
||||
print(f" {'#':>2s} {'Label':<16s} {'Comp':>5s} {dim_headers}")
|
||||
print(f" {'─'*2} {'─'*16} {'─'*5} {'─'*5} {'─'*5} {'─'*5} {'─'*5} {'─'*5}")
|
||||
|
||||
for i, entry in enumerate(ranked, 1):
|
||||
label = entry.get("label", "?")[:16]
|
||||
comp = entry["composite"]
|
||||
dim_vals = " ".join(
|
||||
f"{entry['scores'].get(d, 0.0):5.2f}" for d in DIMENSIONS
|
||||
)
|
||||
bar = "█" * int(comp * 20) + "░" * (20 - int(comp * 20))
|
||||
print(f" {i:>2d} {label:<16s} {comp:5.3f} {dim_vals} {bar}")
|
||||
|
||||
print(f"{'='*72}\n")
|
||||
|
||||
|
||||
def print_trajectory(result: OptimizationResult) -> None:
|
||||
"""Print an ASCII chart of composite score across iterations."""
|
||||
scored = [h for h in result.history if not h.get("error")]
|
||||
if len(scored) < 2:
|
||||
print(" (Not enough data points for trajectory chart)\n")
|
||||
return
|
||||
|
||||
# Get the best composite per iteration
|
||||
iter_best: dict[int, float] = {}
|
||||
for h in scored:
|
||||
it = h["iteration"]
|
||||
if it not in iter_best or h["composite"] > iter_best[it]:
|
||||
iter_best[it] = h["composite"]
|
||||
|
||||
iterations = sorted(iter_best.keys())
|
||||
values = [iter_best[it] for it in iterations]
|
||||
|
||||
# Chart dimensions
|
||||
chart_height = 15
|
||||
min_val = max(0.0, min(values) - 0.05)
|
||||
max_val = min(1.0, max(values) + 0.05)
|
||||
val_range = max_val - min_val
|
||||
if val_range < 0.01:
|
||||
val_range = 0.1
|
||||
min_val = max(0.0, values[0] - 0.05)
|
||||
max_val = min_val + val_range
|
||||
|
||||
print(f" {'─'*50}")
|
||||
print(" SCORE TRAJECTORY — Best Composite per Iteration")
|
||||
print(f" {'─'*50}")
|
||||
print()
|
||||
|
||||
# Render rows top to bottom
|
||||
for row in range(chart_height, -1, -1):
|
||||
threshold = min_val + (row / chart_height) * val_range
|
||||
# Y-axis label every 5 rows
|
||||
if row % 5 == 0:
|
||||
label = f"{threshold:.2f}"
|
||||
else:
|
||||
label = " "
|
||||
line = f" {label} │"
|
||||
|
||||
for vi, val in enumerate(values):
|
||||
normalized = (val - min_val) / val_range
|
||||
filled_rows = int(normalized * chart_height)
|
||||
if filled_rows >= row:
|
||||
line += " ● "
|
||||
else:
|
||||
line += " · "
|
||||
|
||||
print(line)
|
||||
|
||||
# X-axis
|
||||
print(f" ───── ┼{'───' * len(values)}")
|
||||
x_labels = " " + " "
|
||||
for it in iterations:
|
||||
x_labels += f"{it:>2d} "
|
||||
print(x_labels)
|
||||
print(" " + " iteration →")
|
||||
print()
|
||||
|
||||
|
||||
def write_results_json(
|
||||
result: OptimizationResult,
|
||||
output_dir: str,
|
||||
stage: int,
|
||||
iterations: int,
|
||||
variants_per_iter: int,
|
||||
fixture_path: str,
|
||||
) -> str:
|
||||
"""Write optimization results to a timestamped JSON file. Returns the path."""
|
||||
out_path = Path(output_dir)
|
||||
out_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"optimize_stage{stage}_{timestamp}.json"
|
||||
filepath = out_path / filename
|
||||
|
||||
payload = {
|
||||
"config": {
|
||||
"stage": stage,
|
||||
"iterations": iterations,
|
||||
"variants_per_iter": variants_per_iter,
|
||||
"fixture_path": fixture_path,
|
||||
},
|
||||
"best_prompt": result.best_prompt,
|
||||
"best_scores": {
|
||||
"composite": result.best_score.composite,
|
||||
**{d: getattr(result.best_score, d) for d in DIMENSIONS},
|
||||
},
|
||||
"elapsed_seconds": result.elapsed_seconds,
|
||||
"history": result.history,
|
||||
}
|
||||
|
||||
filepath.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
||||
return str(filepath)
|
||||
|
||||
|
||||
# ── CLI ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main() -> int:
|
||||
|
|
@ -52,6 +191,42 @@ def main() -> int:
|
|||
help="Voice preservation dial (0.0=clinical, 1.0=maximum voice). Triggers re-synthesis before scoring.",
|
||||
)
|
||||
|
||||
# -- optimize subcommand --
|
||||
opt_parser = sub.add_parser(
|
||||
"optimize",
|
||||
help="Automated prompt optimization loop with leaderboard output",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--stage",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Pipeline stage to optimize (default: 5)",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--iterations",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Number of optimization iterations (default: 10)",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--variants-per-iter",
|
||||
type=int,
|
||||
default=2,
|
||||
help="Variants generated per iteration (default: 2)",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--file",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to moments JSON fixture file",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
default="backend/pipeline/quality/results/",
|
||||
help="Directory to write result JSON (default: backend/pipeline/quality/results/)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command is None:
|
||||
|
|
@ -67,6 +242,9 @@ def main() -> int:
|
|||
if args.command == "score":
|
||||
return _run_score(args)
|
||||
|
||||
if args.command == "optimize":
|
||||
return _run_optimize(args)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
|
@ -141,5 +319,70 @@ def _run_score(args: argparse.Namespace) -> int:
|
|||
return 0
|
||||
|
||||
|
||||
def _run_optimize(args: argparse.Namespace) -> int:
|
||||
"""Execute the optimize subcommand."""
|
||||
# Stage validation — only stage 5 is supported
|
||||
if args.stage != 5:
|
||||
print(
|
||||
f"Error: only stage 5 is supported for optimization (got stage {args.stage})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
# Validate fixture file exists
|
||||
fixture = Path(args.file)
|
||||
if not fixture.exists():
|
||||
print(f"Error: fixture file not found: {args.file}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Ensure output dir
|
||||
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
settings = get_settings()
|
||||
client = LLMClient(settings)
|
||||
|
||||
loop = OptimizationLoop(
|
||||
client=client,
|
||||
stage=args.stage,
|
||||
fixture_path=args.file,
|
||||
iterations=args.iterations,
|
||||
variants_per_iter=args.variants_per_iter,
|
||||
)
|
||||
|
||||
try:
|
||||
result = loop.run()
|
||||
except KeyboardInterrupt:
|
||||
print("\n Optimization interrupted by user.", file=sys.stderr)
|
||||
return 130
|
||||
except Exception as exc:
|
||||
print(f"\nError: optimization failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# If the loop returned an error on baseline, report and exit
|
||||
if result.best_score.error and not result.history:
|
||||
print(f"\nError: {result.best_score.error}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Reporting
|
||||
print_leaderboard(result)
|
||||
print_trajectory(result)
|
||||
|
||||
# Write results JSON
|
||||
try:
|
||||
json_path = write_results_json(
|
||||
result=result,
|
||||
output_dir=args.output_dir,
|
||||
stage=args.stage,
|
||||
iterations=args.iterations,
|
||||
variants_per_iter=args.variants_per_iter,
|
||||
fixture_path=args.file,
|
||||
)
|
||||
print(f" Results written to: {json_path}")
|
||||
except OSError as exc:
|
||||
print(f" Warning: failed to write results JSON: {exc}", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
|
|||
0
backend/pipeline/quality/results/.gitkeep
Normal file
0
backend/pipeline/quality/results/.gitkeep
Normal file
Loading…
Add table
Reference in a new issue