promptlooper/backend/tests/test_export.py
John Lightner e42117c8ee MAESTRO: Implement export router with JSON, .env, YAML, and markdown report endpoints
Four fully authenticated endpoints at /api/export/experiments/{id}/:
- /best: Returns best config as JSON with weighted score and metadata
- /env: Flattened KEY=VALUE format with metadata comments
- /yaml: Simple YAML serialization (no external dependency)
- /report: Full markdown report with config space, top N configs,
  score distributions, token usage, and timing stats

34 tests in test_export.py covering all endpoints, auth, 404s, and helpers.
Updated test_routers.py to expect 401 (auth required) instead of 501 (stub).
2026-04-07 03:30:45 -05:00

393 lines
15 KiB
Python

"""Tests for backend/routers/export.py — Export best config (JSON, .env, YAML) and report."""
import json
import os
import uuid
from datetime import datetime, timezone
from unittest.mock import patch
import pytest
from fastapi.testclient import TestClient
JWT_SECRET = "test-secret-key-for-jwt-signing"
API_KEY = "test-api-key-12345"
@pytest.fixture(autouse=True)
def _isolate_settings(tmp_path):
"""Ensure tests use a temp SQLite DB and no Redis."""
env = {
"DATABASE_URL": f"sqlite:///{tmp_path / 'test.db'}",
"REDIS_URL": "",
"DATA_DIR": str(tmp_path),
"JWT_SECRET": JWT_SECRET,
"API_KEY": API_KEY,
}
with patch.dict(os.environ, env, clear=False):
import config
new_settings = config.Settings(_env_file=None)
config.settings = new_settings
import main
main.settings = new_settings
main._init_db()
main._init_redis()
from models import Base
Base.metadata.create_all(bind=main.engine)
import auth
auth.settings = new_settings
yield
@pytest.fixture
def db_session():
from main import get_db
gen = get_db()
session = next(gen)
yield session
try:
next(gen)
except StopIteration:
pass
@pytest.fixture
def admin_user(db_session):
from auth import hash_password
from models import User
user = User(username="admin", password_hash=hash_password("adminpass"), is_admin=True)
db_session.add(user)
db_session.commit()
db_session.refresh(user)
return user
@pytest.fixture
def project(db_session, admin_user):
from models import Project
proj = Project(name="Test Project", description="A test project", owner_id=admin_user.id)
db_session.add(proj)
db_session.commit()
db_session.refresh(proj)
return proj
@pytest.fixture
def experiment(db_session, project):
from models import Experiment
exp = Experiment(
name="Test Experiment",
description="An experiment for testing exports",
project_id=project.id,
scoring_config={"weights": {"accuracy": 0.7, "fluency": 0.3}},
parameter_space={"temperature": [0.1, 0.5, 0.9], "model": ["gpt-4", "gpt-3.5"]},
)
db_session.add(exp)
db_session.commit()
db_session.refresh(exp)
return exp
@pytest.fixture
def completed_runs(db_session, experiment):
"""Create 3 completed runs with scores."""
from models import Run, RunStatus, Score
runs = []
configs = [
{"model": "gpt-4", "temperature": 0.1},
{"model": "gpt-4", "temperature": 0.5},
{"model": "gpt-3.5", "temperature": 0.9},
]
scores_data = [
[("accuracy", 0.95), ("fluency", 0.80)],
[("accuracy", 0.85), ("fluency", 0.90)],
[("accuracy", 0.70), ("fluency", 0.60)],
]
for i, (cfg, sc) in enumerate(zip(configs, scores_data)):
run = Run(
experiment_id=experiment.id,
config=cfg,
config_hash=f"hash_{i:03d}",
status=RunStatus.completed,
duration_ms=1000 + i * 500,
tokens_in=100 + i * 50,
tokens_out=200 + i * 100,
)
db_session.add(run)
db_session.flush()
for scorer_name, value in sc:
score = Score(run_id=run.id, scorer_name=scorer_name, value=value)
db_session.add(score)
runs.append(run)
db_session.commit()
for r in runs:
db_session.refresh(r)
return runs
@pytest.fixture
def auth_header():
return {"X-Api-Key": API_KEY}
@pytest.fixture
def client():
from main import app
return TestClient(app)
# ---------------------------------------------------------------------------
# Export Best — JSON
# ---------------------------------------------------------------------------
class TestExportBest:
def test_returns_best_config_json(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/best", headers=auth_header)
assert resp.status_code == 200
data = resp.json()
assert data["experiment_name"] == "Test Experiment"
assert data["config"]["model"] == "gpt-4"
assert data["config"]["temperature"] == 0.1
assert data["weighted_score"] > 0
assert "run_id" in data
assert "config_hash" in data
assert "exported_at" in data
def test_best_uses_weighted_scores(self, client, auth_header, experiment, completed_runs):
"""Run 0 has accuracy=0.95, fluency=0.80. With weights 0.7/0.3, score = (0.95*0.7 + 0.80*0.3)/1.0 = 0.905."""
resp = client.get(f"/api/export/experiments/{experiment.id}/best", headers=auth_header)
data = resp.json()
assert abs(data["weighted_score"] - 0.905) < 0.001
def test_best_404_no_experiment(self, client, auth_header, admin_user):
fake_id = uuid.uuid4()
resp = client.get(f"/api/export/experiments/{fake_id}/best", headers=auth_header)
assert resp.status_code == 404
def test_best_404_no_completed_runs(self, client, auth_header, experiment):
resp = client.get(f"/api/export/experiments/{experiment.id}/best", headers=auth_header)
assert resp.status_code == 404
assert "No completed runs" in resp.json()["detail"]
def test_best_requires_auth(self, client, experiment):
resp = client.get(f"/api/export/experiments/{experiment.id}/best")
assert resp.status_code in (401, 403)
# ---------------------------------------------------------------------------
# Export Best — .env
# ---------------------------------------------------------------------------
class TestExportEnv:
def test_returns_env_format(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/env", headers=auth_header)
assert resp.status_code == 200
assert resp.headers["content-type"] == "text/plain; charset=utf-8"
content = resp.text
assert "# PromptLooper" in content
assert "MODEL=" in content
assert "TEMPERATURE=" in content
def test_env_has_metadata_comments(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/env", headers=auth_header)
content = resp.text
assert "Test Experiment" in content
assert "Weighted score" in content
def test_env_404_no_experiment(self, client, auth_header, admin_user):
fake_id = uuid.uuid4()
resp = client.get(f"/api/export/experiments/{fake_id}/env", headers=auth_header)
assert resp.status_code == 404
def test_env_404_no_runs(self, client, auth_header, experiment):
resp = client.get(f"/api/export/experiments/{experiment.id}/env", headers=auth_header)
assert resp.status_code == 404
def test_env_requires_auth(self, client, experiment):
resp = client.get(f"/api/export/experiments/{experiment.id}/env")
assert resp.status_code in (401, 403)
# ---------------------------------------------------------------------------
# Export Best — YAML
# ---------------------------------------------------------------------------
class TestExportYaml:
def test_returns_yaml_format(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/yaml", headers=auth_header)
assert resp.status_code == 200
assert "text/yaml" in resp.headers["content-type"]
content = resp.text
assert "experiment_name: Test Experiment" in content
assert "config:" in content
def test_yaml_has_metadata_comments(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/yaml", headers=auth_header)
content = resp.text
assert "# PromptLooper" in content
assert "# Weighted score" in content
def test_yaml_404_no_experiment(self, client, auth_header, admin_user):
fake_id = uuid.uuid4()
resp = client.get(f"/api/export/experiments/{fake_id}/yaml", headers=auth_header)
assert resp.status_code == 404
def test_yaml_404_no_runs(self, client, auth_header, experiment):
resp = client.get(f"/api/export/experiments/{experiment.id}/yaml", headers=auth_header)
assert resp.status_code == 404
def test_yaml_requires_auth(self, client, experiment):
resp = client.get(f"/api/export/experiments/{experiment.id}/yaml")
assert resp.status_code in (401, 403)
# ---------------------------------------------------------------------------
# Export Report — Markdown
# ---------------------------------------------------------------------------
class TestExportReport:
def test_returns_markdown_report(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
assert resp.status_code == 200
assert "text/markdown" in resp.headers["content-type"]
content = resp.text
assert "# Experiment Report: Test Experiment" in content
def test_report_contains_config_space(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
content = resp.text
assert "## Configuration Space" in content
assert "temperature" in content
def test_report_contains_top_configs(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
content = resp.text
assert "## Top" in content
assert "Weighted Score" in content
def test_report_contains_score_distributions(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
content = resp.text
assert "## Score Distributions" in content
assert "accuracy" in content
assert "fluency" in content
def test_report_contains_token_usage(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
content = resp.text
assert "## Token Usage" in content
assert "Total tokens in" in content
def test_report_contains_timing(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
content = resp.text
assert "## Timing" in content
assert "Fastest run" in content
def test_report_run_summary(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
content = resp.text
assert "## Run Summary" in content
assert "Total runs" in content
assert "Completed" in content
def test_report_custom_top_n(self, client, auth_header, experiment, completed_runs):
resp = client.get(
f"/api/export/experiments/{experiment.id}/report?top_n=2",
headers=auth_header,
)
assert resp.status_code == 200
content = resp.text
assert "## Top 2 Configurations" in content
def test_report_empty_experiment(self, client, auth_header, experiment):
"""Report should work even with no runs."""
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
assert resp.status_code == 200
content = resp.text
assert "Total runs | 0" in content
assert "_No scored runs available._" in content
def test_report_404_no_experiment(self, client, auth_header, admin_user):
fake_id = uuid.uuid4()
resp = client.get(f"/api/export/experiments/{fake_id}/report", headers=auth_header)
assert resp.status_code == 404
def test_report_requires_auth(self, client, experiment):
resp = client.get(f"/api/export/experiments/{experiment.id}/report")
assert resp.status_code in (401, 403)
def test_report_with_failed_runs(self, client, auth_header, experiment, completed_runs, db_session):
"""Report should count failed runs separately."""
from models import Run, RunStatus
failed = Run(
experiment_id=experiment.id,
config={"model": "bad", "temperature": 0.5},
config_hash="hash_fail",
status=RunStatus.failed,
)
db_session.add(failed)
db_session.commit()
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
content = resp.text
assert "Total runs | 4" in content
assert "Failed | 1" in content
def test_report_description_shown(self, client, auth_header, experiment, completed_runs):
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
content = resp.text
assert "An experiment for testing exports" in content
# ---------------------------------------------------------------------------
# Helper function tests
# ---------------------------------------------------------------------------
class TestHelpers:
def test_flatten_dict_simple(self):
from routers.export import _flatten_dict
result = _flatten_dict({"model": "gpt-4", "temperature": 0.5})
assert result == {"MODEL": "gpt-4", "TEMPERATURE": "0.5"}
def test_flatten_dict_nested(self):
from routers.export import _flatten_dict
result = _flatten_dict({"llm": {"model": "gpt-4", "temp": 0.1}})
assert result == {"LLM_MODEL": "gpt-4", "LLM_TEMP": "0.1"}
def test_flatten_dict_list(self):
from routers.export import _flatten_dict
result = _flatten_dict({"tags": ["a", "b"]})
assert result == {"TAGS": '["a", "b"]'}
def test_dict_to_yaml_simple(self):
from routers.export import _dict_to_yaml
result = _dict_to_yaml({"name": "test", "value": 42})
assert "name: test" in result
assert "value: 42" in result
def test_dict_to_yaml_nested(self):
from routers.export import _dict_to_yaml
result = _dict_to_yaml({"config": {"model": "gpt-4"}})
assert "config:" in result
assert " model: gpt-4" in result
def test_dict_to_yaml_bool_and_none(self):
from routers.export import _dict_to_yaml
result = _dict_to_yaml({"enabled": True, "disabled": False, "empty": None})
assert "enabled: true" in result
assert "disabled: false" in result
assert "empty: null" in result