import { render, screen, waitFor } from "@testing-library/react"; import userEvent from "@testing-library/user-event"; import { MemoryRouter } from "react-router-dom"; import { describe, it, expect, vi, beforeEach } from "vitest"; import ComparePage, { computeLineDiff, computeConfigDiff, } from "./ComparePage"; import * as client from "../api/client"; // --------------------------------------------------------------------------- // Mocks // --------------------------------------------------------------------------- const mockNavigate = vi.fn(); vi.mock("react-router-dom", async () => { const actual = await vi.importActual("react-router-dom"); return { ...actual, useNavigate: () => mockNavigate, }; }); const MOCK_EXPERIMENTS: client.ExperimentResponse[] = [ { id: "exp-1", project_id: "proj-1", name: "Experiment Alpha", description: null, sample_data: null, pipeline_stages: null, scoring_config: null, parameter_space: null, status: "completed", created_at: "2026-04-01T10:00:00Z", updated_at: "2026-04-07T10:00:00Z", }, { id: "exp-2", project_id: "proj-1", name: "Experiment Beta", description: null, sample_data: null, pipeline_stages: null, scoring_config: null, parameter_space: null, status: "completed", created_at: "2026-04-02T10:00:00Z", updated_at: "2026-04-07T12:00:00Z", }, ]; const MOCK_RUNS: client.RunResponse[] = [ { id: "run-1", experiment_id: "exp-1", config_hash: "abc12345deadbeef", config: { model: "gpt-4", temperature: 0.7 }, status: "completed", started_at: "2026-04-07T10:01:00Z", completed_at: "2026-04-07T10:01:05Z", duration_ms: 5000, tokens_in: 100, tokens_out: 200, cost_estimate: 0.01, }, { id: "run-2", experiment_id: "exp-1", config_hash: "def67890cafebabe", config: { model: "gpt-4", temperature: 0.3 }, status: "completed", started_at: "2026-04-07T10:02:00Z", completed_at: "2026-04-07T10:02:03Z", duration_ms: 3000, tokens_in: 80, tokens_out: 150, cost_estimate: 0.008, }, ]; const MOCK_RUN_DETAIL_1: client.RunDetailResponse = { ...MOCK_RUNS[0], stage_results: [ { id: "sr-1", run_id: "run-1", stage_index: 0, prompt_sent: "Summarize the text.", response_raw: "This is the summary\nfrom run one.\nEnd.", model_used: "gpt-4", parameters: { temperature: 0.7 }, tokens_in: 100, tokens_out: 200, latency_ms: 4500, }, ], scores: [ { id: "sc-1", run_id: "run-1", scorer_name: "coherence", value: 0.85, scorer_metadata: null, created_at: "2026-04-07T10:01:05Z", }, { id: "sc-2", run_id: "run-1", scorer_name: "relevance", value: 0.72, scorer_metadata: null, created_at: "2026-04-07T10:01:05Z", }, ], }; const MOCK_RUN_DETAIL_2: client.RunDetailResponse = { ...MOCK_RUNS[1], stage_results: [ { id: "sr-2", run_id: "run-2", stage_index: 0, prompt_sent: "Summarize the text briefly.", response_raw: "This is a different summary\nfrom run two.\nEnd.", model_used: "gpt-4", parameters: { temperature: 0.3 }, tokens_in: 80, tokens_out: 150, latency_ms: 2800, }, ], scores: [ { id: "sc-3", run_id: "run-2", scorer_name: "coherence", value: 0.91, scorer_metadata: null, created_at: "2026-04-07T10:02:03Z", }, { id: "sc-4", run_id: "run-2", scorer_name: "relevance", value: 0.65, scorer_metadata: null, created_at: "2026-04-07T10:02:03Z", }, ], }; function renderCompare() { return render( , ); } function setupDefaultMocks() { vi.spyOn(client.experiments, "list").mockResolvedValue({ items: MOCK_EXPERIMENTS, total: 2, }); vi.spyOn(client.runs, "list").mockResolvedValue({ items: MOCK_RUNS, total: 2, }); vi.spyOn(client.runs, "get").mockImplementation(async (runId: string) => { if (runId === "run-1") return MOCK_RUN_DETAIL_1; if (runId === "run-2") return MOCK_RUN_DETAIL_2; throw new Error("Not found"); }); vi.spyOn(client.runs, "score").mockResolvedValue({ id: "score-new", run_id: "run-1", scorer_name: "human_preference", value: 1.0, scorer_metadata: null, created_at: "2026-04-07T12:00:00Z", }); } // --------------------------------------------------------------------------- // Unit tests for diff helpers // --------------------------------------------------------------------------- describe("computeLineDiff", () => { it("returns same lines for identical strings", () => { const result = computeLineDiff("hello\nworld", "hello\nworld"); expect(result).toEqual([ { type: "same", text: "hello" }, { type: "same", text: "world" }, ]); }); it("detects added lines", () => { const result = computeLineDiff("hello", "hello\nworld"); expect(result).toEqual([ { type: "same", text: "hello" }, { type: "added", text: "world" }, ]); }); it("detects removed lines", () => { const result = computeLineDiff("hello\nworld", "hello"); expect(result).toEqual([ { type: "same", text: "hello" }, { type: "removed", text: "world" }, ]); }); it("detects changed lines", () => { const result = computeLineDiff("hello\nfoo", "hello\nbar"); expect(result.filter((d) => d.type === "same")).toHaveLength(1); expect(result.filter((d) => d.type === "removed")).toHaveLength(1); expect(result.filter((d) => d.type === "added")).toHaveLength(1); }); it("handles empty strings", () => { const result = computeLineDiff("", ""); expect(result).toEqual([{ type: "same", text: "" }]); }); }); describe("computeConfigDiff", () => { it("returns same for identical configs", () => { const result = computeConfigDiff({ a: 1, b: 2 }, { a: 1, b: 2 }); expect(result.every((e) => e.type === "same")).toBe(true); }); it("detects changed values", () => { const result = computeConfigDiff({ a: 1 }, { a: 2 }); expect(result).toEqual([ { key: "a", type: "changed", leftValue: 1, rightValue: 2 }, ]); }); it("detects added keys", () => { const result = computeConfigDiff({}, { a: 1 }); expect(result).toEqual([{ key: "a", type: "added", rightValue: 1 }]); }); it("detects removed keys", () => { const result = computeConfigDiff({ a: 1 }, {}); expect(result).toEqual([{ key: "a", type: "removed", leftValue: 1 }]); }); it("handles mixed changes", () => { const result = computeConfigDiff( { a: 1, b: 2, c: 3 }, { a: 1, b: 99, d: 4 }, ); const types = result.map((e) => `${e.key}:${e.type}`); expect(types).toContain("a:same"); expect(types).toContain("b:changed"); expect(types).toContain("c:removed"); expect(types).toContain("d:added"); }); }); // --------------------------------------------------------------------------- // Component tests // --------------------------------------------------------------------------- describe("ComparePage", () => { beforeEach(() => { vi.restoreAllMocks(); mockNavigate.mockReset(); }); it("shows loading state initially", () => { vi.spyOn(client.experiments, "list").mockImplementation( () => new Promise(() => {}), ); renderCompare(); expect(screen.getByText("Loading experiments…")).toBeInTheDocument(); }); it("shows error state when experiments fail to load", async () => { vi.spyOn(client.experiments, "list").mockRejectedValue( new Error("Network error"), ); renderCompare(); await waitFor(() => { expect(screen.getByTestId("compare-error")).toHaveTextContent( "Network error", ); }); }); it("renders experiment dropdowns after loading", async () => { setupDefaultMocks(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); expect(screen.getByTestId("right-experiment-select")).toBeInTheDocument(); }); it("shows empty state before runs are selected", async () => { setupDefaultMocks(); renderCompare(); await waitFor(() => { expect(screen.getByTestId("compare-empty")).toBeInTheDocument(); }); }); it("populates run dropdown when experiment is selected", async () => { setupDefaultMocks(); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const select = screen.getByTestId("left-run-select") as HTMLSelectElement; // Should have options beyond the placeholder expect(select.options.length).toBeGreaterThan(1); }); }); it("shows config diff when both runs selected", async () => { setupDefaultMocks(); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); // Select left experiment and run await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const select = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(select.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); // Select right experiment and run await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const select = screen.getByTestId( "right-run-select", ) as HTMLSelectElement; expect(select.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); // Config diff should appear await waitFor(() => { expect(screen.getByTestId("config-diff")).toBeInTheDocument(); }); }); it("shows score comparison when both runs are selected", async () => { setupDefaultMocks(); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("right-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); await waitFor(() => { expect(screen.getByTestId("score-comparison")).toBeInTheDocument(); }); expect( screen.getByTestId("score-compare-coherence"), ).toBeInTheDocument(); expect( screen.getByTestId("score-compare-relevance"), ).toBeInTheDocument(); }); it("shows response diff when both runs are selected", async () => { setupDefaultMocks(); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("right-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); await waitFor(() => { expect(screen.getByTestId("response-diff")).toBeInTheDocument(); }); }); it("highlights changed config values", async () => { setupDefaultMocks(); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("right-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); // temperature differs (0.7 vs 0.3) so should show changed await waitFor(() => { expect(screen.getByTestId("config-diff")).toBeInTheDocument(); }); // model is same, temperature changed const changedEntries = screen.getAllByTestId("config-diff-changed"); expect(changedEntries.length).toBeGreaterThanOrEqual(1); const sameEntries = screen.getAllByTestId("config-diff-same"); expect(sameEntries.length).toBeGreaterThanOrEqual(1); }); it("submits human_preference score when picking a winner", async () => { setupDefaultMocks(); const scoreSpy = vi.spyOn(client.runs, "score"); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); // Select both runs await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("right-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); await waitFor(() => { expect(screen.getByTestId("pick-left")).toBeInTheDocument(); }); await user.click(screen.getByTestId("pick-left")); await waitFor(() => { expect(scoreSpy).toHaveBeenCalledWith("run-1", { scorer_name: "human_preference", value: 1.0, metadata: { compared_against: "run-2", comparison_winner: true }, }); }); await waitFor(() => { expect(screen.getByTestId("pick-left")).toHaveTextContent( "Run A Wins", ); }); }); it("can pick Run B as winner", async () => { setupDefaultMocks(); const scoreSpy = vi.spyOn(client.runs, "score"); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("right-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); await waitFor(() => { expect(screen.getByTestId("pick-right")).toBeInTheDocument(); }); await user.click(screen.getByTestId("pick-right")); await waitFor(() => { expect(scoreSpy).toHaveBeenCalledWith("run-2", { scorer_name: "human_preference", value: 1.0, metadata: { compared_against: "run-1", comparison_winner: true }, }); }); await waitFor(() => { expect(screen.getByTestId("pick-right")).toHaveTextContent( "Run B Wins", ); }); }); it("renders run detail cards when both runs selected", async () => { setupDefaultMocks(); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("right-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); await waitFor(() => { const cards = screen.getAllByTestId("run-card"); expect(cards.length).toBe(2); }); }); it("resets winner when changing run selection", async () => { setupDefaultMocks(); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); // Select both runs await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("right-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); // Pick a winner await waitFor(() => { expect(screen.getByTestId("pick-left")).toBeInTheDocument(); }); await user.click(screen.getByTestId("pick-left")); await waitFor(() => { expect(screen.getByTestId("pick-left")).toHaveTextContent( "Run A Wins", ); }); // Change left run — winner should reset await user.selectOptions(screen.getByTestId("left-run-select"), "run-2"); await waitFor(() => { expect(screen.getByTestId("pick-left")).toHaveTextContent("Pick Run A"); }); }); it("disables run select when no experiment is chosen", async () => { setupDefaultMocks(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); const leftRunSelect = screen.getByTestId( "left-run-select", ) as HTMLSelectElement; expect(leftRunSelect.disabled).toBe(true); }); it("shows diff lines with correct types in response diff", async () => { setupDefaultMocks(); const user = userEvent.setup(); renderCompare(); await waitFor(() => { expect( screen.getByTestId("left-experiment-select"), ).toBeInTheDocument(); }); await user.selectOptions( screen.getByTestId("left-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("left-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("left-run-select"), "run-1"); await user.selectOptions( screen.getByTestId("right-experiment-select"), "exp-1", ); await waitFor(() => { const s = screen.getByTestId("right-run-select") as HTMLSelectElement; expect(s.options.length).toBeGreaterThan(1); }); await user.selectOptions(screen.getByTestId("right-run-select"), "run-2"); await waitFor(() => { expect(screen.getByTestId("response-diff")).toBeInTheDocument(); }); // The responses differ so we should have some diff lines const sameLines = screen.getAllByTestId("diff-line-same"); expect(sameLines.length).toBeGreaterThan(0); // At least one removed and one added line expected const removedLines = screen.getAllByTestId("diff-line-removed"); expect(removedLines.length).toBeGreaterThan(0); const addedLines = screen.getAllByTestId("diff-line-added"); expect(addedLines.length).toBeGreaterThan(0); }); it("renders page title", async () => { setupDefaultMocks(); renderCompare(); await waitFor(() => { expect(screen.getByText("Compare Runs")).toBeInTheDocument(); }); }); });