MAESTRO: Implement Experiment Builder page with all six sections and comprehensive tests

Build the full Experiment Builder (ExperimentPage.tsx) with: basic info form,
sample data input (text/JSON/file upload), pipeline stage builder with template
variables and preview, scoring configuration with enable toggles and weight
sliders, parameter space definition (fixed/range/options types), and action
buttons (Save Draft, Run Single, Start Sweep). Supports both creating new
experiments and editing existing ones. 20 tests added.
This commit is contained in:
John Lightner 2026-04-07 02:52:52 -05:00
parent d607970f0c
commit e8ce2f016b
3 changed files with 1585 additions and 5 deletions

View file

@ -67,7 +67,11 @@ describe("App routing", () => {
it("renders ExperimentPage at /experiments/:id", async () => {
renderWithRouter("/experiments/abc-123");
await waitFor(() => {
expect(screen.getByText("Experiment")).toBeInTheDocument();
// Page attempts to load experiment — may show error or loading state
expect(
screen.getByText("Loading experiment…") ||
screen.getByRole("alert"),
).toBeInTheDocument();
});
});

View file

@ -0,0 +1,493 @@
import { render, screen, waitFor, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { MemoryRouter, Route, Routes } from "react-router-dom";
import { describe, it, expect, vi, beforeEach } from "vitest";
import ExperimentPage from "./ExperimentPage";
import * as client from "../api/client";
// ---------------------------------------------------------------------------
// Mocks
// ---------------------------------------------------------------------------
const mockNavigate = vi.fn();
vi.mock("react-router-dom", async () => {
const actual = await vi.importActual("react-router-dom");
return {
...actual,
useNavigate: () => mockNavigate,
};
});
const MOCK_EXPERIMENT: client.ExperimentResponse = {
id: "exp-1",
project_id: "proj-1",
name: "Test Experiment",
description: "A test experiment",
sample_data: { text: "Hello world" },
pipeline_stages: {
stages: [
{
id: "s1",
prompt_template: "Summarize: {{ input_data }}",
model: "gpt-4",
endpoint_id: "ep-1",
parameters: {},
},
],
},
scoring_config: null,
parameter_space: null,
status: "draft",
created_at: "2026-04-01T10:00:00Z",
updated_at: "2026-04-07T08:00:00Z",
};
const MOCK_ENDPOINTS: client.EndpointResponse[] = [
{ id: "ep-1", name: "Local vLLM", url: "http://localhost:8080", default_model: "gpt-4" },
{ id: "ep-2", name: "OpenAI", url: "https://api.openai.com", default_model: "gpt-3.5-turbo" },
];
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function renderNew() {
return render(
<MemoryRouter initialEntries={["/experiments/new"]}>
<Routes>
<Route path="/experiments/:id" element={<ExperimentPage />} />
</Routes>
</MemoryRouter>,
);
}
function renderExisting(id = "exp-1") {
return render(
<MemoryRouter initialEntries={[`/experiments/${id}`]}>
<Routes>
<Route path="/experiments/:id" element={<ExperimentPage />} />
</Routes>
</MemoryRouter>,
);
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe("ExperimentPage", () => {
beforeEach(() => {
vi.restoreAllMocks();
mockNavigate.mockReset();
// Default stubs
vi.spyOn(client.endpoints, "list").mockResolvedValue({
items: MOCK_ENDPOINTS,
total: 2,
});
});
// ---- New experiment ---------------------------------------------------
it("renders new experiment form without loading data", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("New Experiment")).toBeInTheDocument();
});
expect(screen.getByLabelText("Name")).toHaveValue("");
expect(screen.getByLabelText("Description")).toHaveValue("");
});
it("shows all six sections for new experiment", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Basic Info")).toBeInTheDocument();
});
expect(screen.getByText("Sample Data")).toBeInTheDocument();
expect(screen.getByText("Pipeline Stages")).toBeInTheDocument();
expect(screen.getByText("Scoring Configuration")).toBeInTheDocument();
expect(screen.getByText("Parameter Space")).toBeInTheDocument();
expect(screen.getByRole("button", { name: "Save Draft" })).toBeInTheDocument();
expect(screen.getByRole("button", { name: "Run Single" })).toBeInTheDocument();
expect(screen.getByRole("button", { name: "Start Sweep" })).toBeInTheDocument();
});
// ---- Loading existing experiment --------------------------------------
it("loads and populates existing experiment data", async () => {
vi.spyOn(client.experiments, "get").mockResolvedValue(MOCK_EXPERIMENT);
renderExisting();
await waitFor(() => {
expect(screen.getByText("Edit Experiment")).toBeInTheDocument();
});
expect(screen.getByLabelText("Name")).toHaveValue("Test Experiment");
expect(screen.getByLabelText("Description")).toHaveValue(
"A test experiment",
);
});
it("shows loading state while fetching", () => {
vi.spyOn(client.experiments, "get").mockImplementation(
() => new Promise(() => {}),
);
renderExisting();
expect(screen.getByText("Loading experiment…")).toBeInTheDocument();
});
it("shows error state on load failure", async () => {
vi.spyOn(client.experiments, "get").mockRejectedValue(
new client.ApiError(404, "Not Found", { detail: "Not found" }),
);
renderExisting();
await waitFor(() => {
expect(screen.getByRole("alert")).toHaveTextContent(
"Failed to load experiment (404)",
);
});
});
it("shows network error on fetch failure", async () => {
vi.spyOn(client.experiments, "get").mockRejectedValue(
new Error("fetch failed"),
);
renderExisting();
await waitFor(() => {
expect(screen.getByRole("alert")).toHaveTextContent("Network error");
});
});
it("retries loading on retry button click", async () => {
const getSpy = vi
.spyOn(client.experiments, "get")
.mockRejectedValueOnce(new Error("fail"))
.mockResolvedValueOnce(MOCK_EXPERIMENT);
renderExisting();
await waitFor(() => {
expect(screen.getByText("Retry")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByText("Retry"));
await waitFor(() => {
expect(screen.getByLabelText("Name")).toHaveValue("Test Experiment");
});
expect(getSpy).toHaveBeenCalledTimes(2);
});
// ---- Sample data section ----------------------------------------------
it("switches between sample data modes", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Sample Data")).toBeInTheDocument();
});
const user = userEvent.setup();
// Default: text mode
expect(screen.getByPlaceholderText("Paste your sample data here...")).toBeInTheDocument();
// Switch to JSON
await user.click(screen.getByRole("button", { name: "JSON" }));
expect(screen.getByPlaceholderText('{"key": "value"}')).toBeInTheDocument();
// Switch to file upload
await user.click(screen.getByRole("button", { name: "Upload File" }));
expect(screen.getByTestId("file-upload")).toBeInTheDocument();
});
it("shows JSON validation error for invalid JSON", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Sample Data")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "JSON" }));
const textarea = screen.getByPlaceholderText('{"key": "value"}');
await user.type(textarea, "not json");
expect(screen.getByRole("alert")).toHaveTextContent("Invalid JSON");
});
// ---- Pipeline stages --------------------------------------------------
it("adds and removes pipeline stages", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Pipeline Stages")).toBeInTheDocument();
});
const user = userEvent.setup();
// Initially empty
expect(screen.getByText("No stages yet. Add a pipeline stage to get started.")).toBeInTheDocument();
// Add a stage
await user.click(screen.getByRole("button", { name: "Add Stage" }));
expect(screen.getByTestId("pipeline-stage-0")).toBeInTheDocument();
expect(screen.getByText("Stage 1")).toBeInTheDocument();
// Add another
await user.click(screen.getByRole("button", { name: "Add Stage" }));
expect(screen.getByTestId("pipeline-stage-1")).toBeInTheDocument();
expect(screen.getByText("Stage 2")).toBeInTheDocument();
// Remove first stage
await user.click(screen.getByRole("button", { name: "Remove stage 1" }));
expect(screen.queryByText("Stage 2")).not.toBeInTheDocument();
expect(screen.getByText("Stage 1")).toBeInTheDocument();
});
it("shows template variable hints in pipeline stage", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Pipeline Stages")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Add Stage" }));
expect(screen.getByText("{{ input_data }}")).toBeInTheDocument();
expect(screen.getByText("{{ previous_stage_output }}")).toBeInTheDocument();
});
it("shows preview of prompt template", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Pipeline Stages")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Add Stage" }));
const stage = screen.getByTestId("pipeline-stage-0");
const templateInput = within(stage).getByPlaceholderText(
/Enter your prompt template/,
);
await user.type(templateInput, "Summarize: {{{{ input_data }}}}");
await user.click(within(stage).getByText("Preview"));
expect(screen.getByTestId("stage-preview-0")).toHaveTextContent(
"Summarize: [input_data]",
);
});
// ---- Scoring configuration --------------------------------------------
it("toggles scorers and shows weight slider", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Scoring Configuration")).toBeInTheDocument();
});
const user = userEvent.setup();
// All scorers should be visible
expect(screen.getByText("length")).toBeInTheDocument();
expect(screen.getByText("similarity")).toBeInTheDocument();
expect(screen.getByText("llm_judge")).toBeInTheDocument();
// Enable length scorer
const lengthCheckbox = screen.getByRole("checkbox", { name: /length/i });
expect(lengthCheckbox).not.toBeChecked();
await user.click(lengthCheckbox);
expect(lengthCheckbox).toBeChecked();
// Weight slider should appear
expect(screen.getByRole("slider", { name: /Weight for length/i })).toBeInTheDocument();
});
// ---- Parameter space --------------------------------------------------
it("adds and removes parameters", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Parameter Space")).toBeInTheDocument();
});
const user = userEvent.setup();
expect(screen.getByText("No parameters defined. Add parameters to sweep over.")).toBeInTheDocument();
await user.click(screen.getByRole("button", { name: "Add Parameter" }));
expect(screen.getByTestId("parameter-0")).toBeInTheDocument();
await user.click(screen.getByRole("button", { name: "Remove parameter 1" }));
expect(screen.queryByTestId("parameter-0")).not.toBeInTheDocument();
});
it("switches parameter types", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("Parameter Space")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Add Parameter" }));
const param = screen.getByTestId("parameter-0");
// Default is fixed — shows fixed value input
expect(
within(param).getByRole("textbox", { name: /fixed value/i }),
).toBeInTheDocument();
// Switch to range
await user.click(within(param).getByRole("button", { name: "range" }));
expect(
within(param).getByRole("spinbutton", { name: /min/i }),
).toBeInTheDocument();
expect(
within(param).getByRole("spinbutton", { name: /max/i }),
).toBeInTheDocument();
expect(
within(param).getByRole("spinbutton", { name: /step/i }),
).toBeInTheDocument();
// Switch to options
await user.click(within(param).getByRole("button", { name: "options" }));
expect(
within(param).getByRole("textbox", { name: /options/i }),
).toBeInTheDocument();
});
// ---- Save / actions ---------------------------------------------------
it("shows validation error when saving without a name", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("New Experiment")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Save Draft" }));
expect(screen.getByRole("status")).toHaveTextContent(
"Experiment name is required",
);
});
it("creates a new experiment on Save Draft", async () => {
const created: client.ExperimentResponse = {
...MOCK_EXPERIMENT,
id: "exp-new",
};
vi.spyOn(client.experiments, "create").mockResolvedValue(created);
renderNew();
await waitFor(() => {
expect(screen.getByText("New Experiment")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.type(screen.getByLabelText("Name"), "My New Experiment");
await user.click(screen.getByRole("button", { name: "Save Draft" }));
await waitFor(() => {
expect(client.experiments.create).toHaveBeenCalledWith(
expect.objectContaining({ name: "My New Experiment" }),
);
});
expect(mockNavigate).toHaveBeenCalledWith("/experiments/exp-new", {
replace: true,
});
});
it("updates an existing experiment on Save Draft", async () => {
vi.spyOn(client.experiments, "get").mockResolvedValue(MOCK_EXPERIMENT);
vi.spyOn(client.experiments, "update").mockResolvedValue(MOCK_EXPERIMENT);
renderExisting();
await waitFor(() => {
expect(screen.getByLabelText("Name")).toHaveValue("Test Experiment");
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Save Draft" }));
await waitFor(() => {
expect(client.experiments.update).toHaveBeenCalledWith(
"exp-1",
expect.objectContaining({ name: "Test Experiment" }),
);
});
expect(screen.getByRole("status")).toHaveTextContent("Saved!");
});
it("shows save error on API failure", async () => {
vi.spyOn(client.experiments, "create").mockRejectedValue(
new client.ApiError(400, "Bad Request", { detail: "Invalid config" }),
);
renderNew();
await waitFor(() => {
expect(screen.getByText("New Experiment")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.type(screen.getByLabelText("Name"), "Test");
await user.click(screen.getByRole("button", { name: "Save Draft" }));
await waitFor(() => {
expect(screen.getByRole("status")).toHaveTextContent(
"Save failed: Invalid config",
);
});
});
it("starts a sweep and navigates to live page", async () => {
vi.spyOn(client.experiments, "get").mockResolvedValue(MOCK_EXPERIMENT);
vi.spyOn(client.experiments, "update").mockResolvedValue(MOCK_EXPERIMENT);
vi.spyOn(client.experiments, "startSweep").mockResolvedValue(undefined as never);
renderExisting();
await waitFor(() => {
expect(screen.getByLabelText("Name")).toHaveValue("Test Experiment");
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Start Sweep" }));
await waitFor(() => {
expect(client.experiments.startSweep).toHaveBeenCalledWith("exp-1");
});
expect(mockNavigate).toHaveBeenCalledWith("/live/exp-1");
});
it("navigates back when clicking Back button", async () => {
renderNew();
await waitFor(() => {
expect(screen.getByText("New Experiment")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Back" }));
expect(mockNavigate).toHaveBeenCalledWith(-1);
});
});

File diff suppressed because it is too large Load diff