Four fully authenticated endpoints at /api/export/experiments/{id}/:
- /best: Returns best config as JSON with weighted score and metadata
- /env: Flattened KEY=VALUE format with metadata comments
- /yaml: Simple YAML serialization (no external dependency)
- /report: Full markdown report with config space, top N configs,
score distributions, token usage, and timing stats
34 tests in test_export.py covering all endpoints, auth, 404s, and helpers.
Updated test_routers.py to expect 401 (auth required) instead of 501 (stub).
393 lines
15 KiB
Python
393 lines
15 KiB
Python
"""Tests for backend/routers/export.py — Export best config (JSON, .env, YAML) and report."""
|
|
|
|
import json
|
|
import os
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
|
|
JWT_SECRET = "test-secret-key-for-jwt-signing"
|
|
API_KEY = "test-api-key-12345"
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _isolate_settings(tmp_path):
|
|
"""Ensure tests use a temp SQLite DB and no Redis."""
|
|
env = {
|
|
"DATABASE_URL": f"sqlite:///{tmp_path / 'test.db'}",
|
|
"REDIS_URL": "",
|
|
"DATA_DIR": str(tmp_path),
|
|
"JWT_SECRET": JWT_SECRET,
|
|
"API_KEY": API_KEY,
|
|
}
|
|
with patch.dict(os.environ, env, clear=False):
|
|
import config
|
|
new_settings = config.Settings(_env_file=None)
|
|
config.settings = new_settings
|
|
|
|
import main
|
|
main.settings = new_settings
|
|
main._init_db()
|
|
main._init_redis()
|
|
|
|
from models import Base
|
|
Base.metadata.create_all(bind=main.engine)
|
|
|
|
import auth
|
|
auth.settings = new_settings
|
|
|
|
yield
|
|
|
|
|
|
@pytest.fixture
|
|
def db_session():
|
|
from main import get_db
|
|
gen = get_db()
|
|
session = next(gen)
|
|
yield session
|
|
try:
|
|
next(gen)
|
|
except StopIteration:
|
|
pass
|
|
|
|
|
|
@pytest.fixture
|
|
def admin_user(db_session):
|
|
from auth import hash_password
|
|
from models import User
|
|
user = User(username="admin", password_hash=hash_password("adminpass"), is_admin=True)
|
|
db_session.add(user)
|
|
db_session.commit()
|
|
db_session.refresh(user)
|
|
return user
|
|
|
|
|
|
@pytest.fixture
|
|
def project(db_session, admin_user):
|
|
from models import Project
|
|
proj = Project(name="Test Project", description="A test project", owner_id=admin_user.id)
|
|
db_session.add(proj)
|
|
db_session.commit()
|
|
db_session.refresh(proj)
|
|
return proj
|
|
|
|
|
|
@pytest.fixture
|
|
def experiment(db_session, project):
|
|
from models import Experiment
|
|
exp = Experiment(
|
|
name="Test Experiment",
|
|
description="An experiment for testing exports",
|
|
project_id=project.id,
|
|
scoring_config={"weights": {"accuracy": 0.7, "fluency": 0.3}},
|
|
parameter_space={"temperature": [0.1, 0.5, 0.9], "model": ["gpt-4", "gpt-3.5"]},
|
|
)
|
|
db_session.add(exp)
|
|
db_session.commit()
|
|
db_session.refresh(exp)
|
|
return exp
|
|
|
|
|
|
@pytest.fixture
|
|
def completed_runs(db_session, experiment):
|
|
"""Create 3 completed runs with scores."""
|
|
from models import Run, RunStatus, Score
|
|
|
|
runs = []
|
|
configs = [
|
|
{"model": "gpt-4", "temperature": 0.1},
|
|
{"model": "gpt-4", "temperature": 0.5},
|
|
{"model": "gpt-3.5", "temperature": 0.9},
|
|
]
|
|
scores_data = [
|
|
[("accuracy", 0.95), ("fluency", 0.80)],
|
|
[("accuracy", 0.85), ("fluency", 0.90)],
|
|
[("accuracy", 0.70), ("fluency", 0.60)],
|
|
]
|
|
for i, (cfg, sc) in enumerate(zip(configs, scores_data)):
|
|
run = Run(
|
|
experiment_id=experiment.id,
|
|
config=cfg,
|
|
config_hash=f"hash_{i:03d}",
|
|
status=RunStatus.completed,
|
|
duration_ms=1000 + i * 500,
|
|
tokens_in=100 + i * 50,
|
|
tokens_out=200 + i * 100,
|
|
)
|
|
db_session.add(run)
|
|
db_session.flush()
|
|
|
|
for scorer_name, value in sc:
|
|
score = Score(run_id=run.id, scorer_name=scorer_name, value=value)
|
|
db_session.add(score)
|
|
|
|
runs.append(run)
|
|
|
|
db_session.commit()
|
|
for r in runs:
|
|
db_session.refresh(r)
|
|
return runs
|
|
|
|
|
|
@pytest.fixture
|
|
def auth_header():
|
|
return {"X-Api-Key": API_KEY}
|
|
|
|
|
|
@pytest.fixture
|
|
def client():
|
|
from main import app
|
|
return TestClient(app)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Export Best — JSON
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExportBest:
|
|
def test_returns_best_config_json(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/best", headers=auth_header)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["experiment_name"] == "Test Experiment"
|
|
assert data["config"]["model"] == "gpt-4"
|
|
assert data["config"]["temperature"] == 0.1
|
|
assert data["weighted_score"] > 0
|
|
assert "run_id" in data
|
|
assert "config_hash" in data
|
|
assert "exported_at" in data
|
|
|
|
def test_best_uses_weighted_scores(self, client, auth_header, experiment, completed_runs):
|
|
"""Run 0 has accuracy=0.95, fluency=0.80. With weights 0.7/0.3, score = (0.95*0.7 + 0.80*0.3)/1.0 = 0.905."""
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/best", headers=auth_header)
|
|
data = resp.json()
|
|
assert abs(data["weighted_score"] - 0.905) < 0.001
|
|
|
|
def test_best_404_no_experiment(self, client, auth_header, admin_user):
|
|
fake_id = uuid.uuid4()
|
|
resp = client.get(f"/api/export/experiments/{fake_id}/best", headers=auth_header)
|
|
assert resp.status_code == 404
|
|
|
|
def test_best_404_no_completed_runs(self, client, auth_header, experiment):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/best", headers=auth_header)
|
|
assert resp.status_code == 404
|
|
assert "No completed runs" in resp.json()["detail"]
|
|
|
|
def test_best_requires_auth(self, client, experiment):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/best")
|
|
assert resp.status_code in (401, 403)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Export Best — .env
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExportEnv:
|
|
def test_returns_env_format(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/env", headers=auth_header)
|
|
assert resp.status_code == 200
|
|
assert resp.headers["content-type"] == "text/plain; charset=utf-8"
|
|
content = resp.text
|
|
assert "# PromptLooper" in content
|
|
assert "MODEL=" in content
|
|
assert "TEMPERATURE=" in content
|
|
|
|
def test_env_has_metadata_comments(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/env", headers=auth_header)
|
|
content = resp.text
|
|
assert "Test Experiment" in content
|
|
assert "Weighted score" in content
|
|
|
|
def test_env_404_no_experiment(self, client, auth_header, admin_user):
|
|
fake_id = uuid.uuid4()
|
|
resp = client.get(f"/api/export/experiments/{fake_id}/env", headers=auth_header)
|
|
assert resp.status_code == 404
|
|
|
|
def test_env_404_no_runs(self, client, auth_header, experiment):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/env", headers=auth_header)
|
|
assert resp.status_code == 404
|
|
|
|
def test_env_requires_auth(self, client, experiment):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/env")
|
|
assert resp.status_code in (401, 403)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Export Best — YAML
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExportYaml:
|
|
def test_returns_yaml_format(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/yaml", headers=auth_header)
|
|
assert resp.status_code == 200
|
|
assert "text/yaml" in resp.headers["content-type"]
|
|
content = resp.text
|
|
assert "experiment_name: Test Experiment" in content
|
|
assert "config:" in content
|
|
|
|
def test_yaml_has_metadata_comments(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/yaml", headers=auth_header)
|
|
content = resp.text
|
|
assert "# PromptLooper" in content
|
|
assert "# Weighted score" in content
|
|
|
|
def test_yaml_404_no_experiment(self, client, auth_header, admin_user):
|
|
fake_id = uuid.uuid4()
|
|
resp = client.get(f"/api/export/experiments/{fake_id}/yaml", headers=auth_header)
|
|
assert resp.status_code == 404
|
|
|
|
def test_yaml_404_no_runs(self, client, auth_header, experiment):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/yaml", headers=auth_header)
|
|
assert resp.status_code == 404
|
|
|
|
def test_yaml_requires_auth(self, client, experiment):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/yaml")
|
|
assert resp.status_code in (401, 403)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Export Report — Markdown
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExportReport:
|
|
def test_returns_markdown_report(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
assert resp.status_code == 200
|
|
assert "text/markdown" in resp.headers["content-type"]
|
|
content = resp.text
|
|
assert "# Experiment Report: Test Experiment" in content
|
|
|
|
def test_report_contains_config_space(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
content = resp.text
|
|
assert "## Configuration Space" in content
|
|
assert "temperature" in content
|
|
|
|
def test_report_contains_top_configs(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
content = resp.text
|
|
assert "## Top" in content
|
|
assert "Weighted Score" in content
|
|
|
|
def test_report_contains_score_distributions(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
content = resp.text
|
|
assert "## Score Distributions" in content
|
|
assert "accuracy" in content
|
|
assert "fluency" in content
|
|
|
|
def test_report_contains_token_usage(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
content = resp.text
|
|
assert "## Token Usage" in content
|
|
assert "Total tokens in" in content
|
|
|
|
def test_report_contains_timing(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
content = resp.text
|
|
assert "## Timing" in content
|
|
assert "Fastest run" in content
|
|
|
|
def test_report_run_summary(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
content = resp.text
|
|
assert "## Run Summary" in content
|
|
assert "Total runs" in content
|
|
assert "Completed" in content
|
|
|
|
def test_report_custom_top_n(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(
|
|
f"/api/export/experiments/{experiment.id}/report?top_n=2",
|
|
headers=auth_header,
|
|
)
|
|
assert resp.status_code == 200
|
|
content = resp.text
|
|
assert "## Top 2 Configurations" in content
|
|
|
|
def test_report_empty_experiment(self, client, auth_header, experiment):
|
|
"""Report should work even with no runs."""
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
assert resp.status_code == 200
|
|
content = resp.text
|
|
assert "Total runs | 0" in content
|
|
assert "_No scored runs available._" in content
|
|
|
|
def test_report_404_no_experiment(self, client, auth_header, admin_user):
|
|
fake_id = uuid.uuid4()
|
|
resp = client.get(f"/api/export/experiments/{fake_id}/report", headers=auth_header)
|
|
assert resp.status_code == 404
|
|
|
|
def test_report_requires_auth(self, client, experiment):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report")
|
|
assert resp.status_code in (401, 403)
|
|
|
|
def test_report_with_failed_runs(self, client, auth_header, experiment, completed_runs, db_session):
|
|
"""Report should count failed runs separately."""
|
|
from models import Run, RunStatus
|
|
failed = Run(
|
|
experiment_id=experiment.id,
|
|
config={"model": "bad", "temperature": 0.5},
|
|
config_hash="hash_fail",
|
|
status=RunStatus.failed,
|
|
)
|
|
db_session.add(failed)
|
|
db_session.commit()
|
|
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
content = resp.text
|
|
assert "Total runs | 4" in content
|
|
assert "Failed | 1" in content
|
|
|
|
def test_report_description_shown(self, client, auth_header, experiment, completed_runs):
|
|
resp = client.get(f"/api/export/experiments/{experiment.id}/report", headers=auth_header)
|
|
content = resp.text
|
|
assert "An experiment for testing exports" in content
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helper function tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestHelpers:
|
|
def test_flatten_dict_simple(self):
|
|
from routers.export import _flatten_dict
|
|
result = _flatten_dict({"model": "gpt-4", "temperature": 0.5})
|
|
assert result == {"MODEL": "gpt-4", "TEMPERATURE": "0.5"}
|
|
|
|
def test_flatten_dict_nested(self):
|
|
from routers.export import _flatten_dict
|
|
result = _flatten_dict({"llm": {"model": "gpt-4", "temp": 0.1}})
|
|
assert result == {"LLM_MODEL": "gpt-4", "LLM_TEMP": "0.1"}
|
|
|
|
def test_flatten_dict_list(self):
|
|
from routers.export import _flatten_dict
|
|
result = _flatten_dict({"tags": ["a", "b"]})
|
|
assert result == {"TAGS": '["a", "b"]'}
|
|
|
|
def test_dict_to_yaml_simple(self):
|
|
from routers.export import _dict_to_yaml
|
|
result = _dict_to_yaml({"name": "test", "value": 42})
|
|
assert "name: test" in result
|
|
assert "value: 42" in result
|
|
|
|
def test_dict_to_yaml_nested(self):
|
|
from routers.export import _dict_to_yaml
|
|
result = _dict_to_yaml({"config": {"model": "gpt-4"}})
|
|
assert "config:" in result
|
|
assert " model: gpt-4" in result
|
|
|
|
def test_dict_to_yaml_bool_and_none(self):
|
|
from routers.export import _dict_to_yaml
|
|
result = _dict_to_yaml({"enabled": True, "disabled": False, "empty": None})
|
|
assert "enabled: true" in result
|
|
assert "disabled: false" in result
|
|
assert "empty: null" in result
|