diff --git a/Auto Run Docs/02b-frontend-dashboard.md b/Auto Run Docs/02b-frontend-dashboard.md index 5aa4e50..6da4b2f 100644 --- a/Auto Run Docs/02b-frontend-dashboard.md +++ b/Auto Run Docs/02b-frontend-dashboard.md @@ -35,7 +35,8 @@ Build the React frontend: setup wizard, experiment builder, real-time observabil - [x] Build the Steering Controls component (frontend/src/components/SteeringControls.tsx). Buttons for: Pause (yellow, shows confirmation), Resume (green), Stop (red, shows confirmation), Fork (opens modal to create new experiment from current best), Export Best (dropdown: JSON/YAML/.env). Also show: progress bar (X of Y runs), token counter (running total), estimated cost, cache hit rate percentage, and estimated time remaining. -- [ ] Build the Run Card component (frontend/src/components/RunCard.tsx). Expandable card showing: config summary, all scores with visual bars, prompt sent (collapsible), raw response (collapsible with copy button), timing breakdown per stage, cache status badge. Used in both the leaderboard detail view and the Compare page. +- [x] Build the Run Card component (frontend/src/components/RunCard.tsx). Expandable card showing: config summary, all scores with visual bars, prompt sent (collapsible), raw response (collapsible with copy button), timing breakdown per stage, cache status badge. Used in both the leaderboard detail view and the Compare page. + - [ ] Implement the Compare page (frontend/src/pages/Compare.tsx). Side-by-side comparison of any two runs. Two columns, each with a run selector (dropdown or search). Show: config diff (highlight what changed), response diff (inline text diff with highlights), score comparison (bar chart overlay), and a "pick winner" button for human rating. diff --git a/frontend/src/components/RunCard.test.tsx b/frontend/src/components/RunCard.test.tsx new file mode 100644 index 0000000..62dbe25 --- /dev/null +++ b/frontend/src/components/RunCard.test.tsx @@ -0,0 +1,328 @@ +import { render, screen, within } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { describe, it, expect, vi, beforeEach } from "vitest"; +import RunCard from "./RunCard"; +import type { RunCardData, RunCardProps } from "./RunCard"; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +function makeStage(overrides: Partial = {}): import("./RunCard").RunCardStageResult { + return { + stage_index: 0, + prompt_sent: "Summarize this: {{ input_data }}", + response_raw: "This is a summary of the input.", + model_used: "gpt-4", + parameters: { temperature: 0.7 }, + tokens_in: 120, + tokens_out: 45, + latency_ms: 1250, + ...overrides, + }; +} + +function makeRun(overrides: Partial = {}): RunCardData { + return { + run_id: "run-abc-123", + config_summary: "gpt-4 t=0.7", + config: { model: "gpt-4", temperature: 0.7 }, + status: "completed", + cached: false, + duration_ms: 1500, + tokens_in: 120, + tokens_out: 45, + scores: [ + { scorer_name: "length", value: 0.85 }, + { scorer_name: "quality", value: 0.92 }, + ], + stage_results: [makeStage()], + ...overrides, + }; +} + +function renderCard(overrides: Partial = {}) { + const props: RunCardProps = { + run: makeRun(), + ...overrides, + }; + return { ...render(), props }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("RunCard", () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + // ---- Basic rendering ---- + + it("renders the card with config summary and status badge", () => { + renderCard(); + expect(screen.getByTestId("run-card")).toBeInTheDocument(); + expect(screen.getByText("gpt-4 t=0.7")).toBeInTheDocument(); + expect(screen.getByText("completed")).toBeInTheDocument(); + }); + + it("shows duration in the header when provided", () => { + renderCard({ run: makeRun({ duration_ms: 2340 }) }); + expect(screen.getByText("2.34s")).toBeInTheDocument(); + }); + + it("does not show duration when null", () => { + renderCard({ run: makeRun({ duration_ms: null }) }); + expect(screen.queryByText(/\d+\.\d+s/)).not.toBeInTheDocument(); + }); + + // ---- Status badges ---- + + it("shows cached badge when run is cached", () => { + renderCard({ run: makeRun({ cached: true }) }); + expect(screen.getByText("cached")).toBeInTheDocument(); + }); + + it("shows running badge", () => { + renderCard({ run: makeRun({ status: "running", cached: false }) }); + expect(screen.getByText("running")).toBeInTheDocument(); + }); + + it("shows failed badge", () => { + renderCard({ run: makeRun({ status: "failed", cached: false }) }); + expect(screen.getByText("failed")).toBeInTheDocument(); + }); + + it("shows unknown status text", () => { + renderCard({ run: makeRun({ status: "queued", cached: false }) }); + expect(screen.getByText("queued")).toBeInTheDocument(); + }); + + // ---- Expand/collapse ---- + + it("is collapsed by default", () => { + renderCard(); + expect(screen.queryByTestId("run-card-detail")).not.toBeInTheDocument(); + }); + + it("expands when header is clicked", async () => { + const user = userEvent.setup(); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByTestId("run-card-detail")).toBeInTheDocument(); + }); + + it("collapses when header is clicked again", async () => { + const user = userEvent.setup(); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByTestId("run-card-detail")).toBeInTheDocument(); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.queryByTestId("run-card-detail")).not.toBeInTheDocument(); + }); + + it("respects defaultExpanded prop", () => { + renderCard({ defaultExpanded: true }); + expect(screen.getByTestId("run-card-detail")).toBeInTheDocument(); + }); + + // ---- Scores section ---- + + it("displays all scores with visual bars when expanded", async () => { + const user = userEvent.setup(); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByText("length")).toBeInTheDocument(); + expect(screen.getByText("quality")).toBeInTheDocument(); + expect(screen.getByText("0.850")).toBeInTheDocument(); + expect(screen.getByText("0.920")).toBeInTheDocument(); + expect(screen.getByTestId("score-bar-length")).toBeInTheDocument(); + expect(screen.getByTestId("score-bar-quality")).toBeInTheDocument(); + }); + + it("score bar widths reflect values", async () => { + const user = userEvent.setup(); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + const bar = screen.getByTestId("score-bar-length"); + expect(bar.style.width).toBe("85%"); + }); + + it("does not show scores heading when no scores", async () => { + const user = userEvent.setup(); + renderCard({ run: makeRun({ scores: [] }) }); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.queryByText("Scores")).not.toBeInTheDocument(); + }); + + // ---- Config section ---- + + it("shows config JSON when expanded", async () => { + const user = userEvent.setup(); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByText(/\"model\": \"gpt-4\"/)).toBeInTheDocument(); + }); + + // ---- Stage timing ---- + + it("shows stage timing breakdown", async () => { + const user = userEvent.setup(); + renderCard({ + run: makeRun({ + stage_results: [ + makeStage({ stage_index: 0, model_used: "gpt-4", latency_ms: 1250, tokens_in: 100, tokens_out: 50 }), + makeStage({ stage_index: 1, model_used: "gpt-3.5", latency_ms: 800, tokens_in: 60, tokens_out: 30 }), + ], + }), + }); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByText("Stage 1")).toBeInTheDocument(); + expect(screen.getByText("Stage 2")).toBeInTheDocument(); + expect(screen.getByText("(gpt-4)")).toBeInTheDocument(); + expect(screen.getByText("(gpt-3.5)")).toBeInTheDocument(); + expect(screen.getByText("1.25s")).toBeInTheDocument(); + expect(screen.getByText("0.80s")).toBeInTheDocument(); + }); + + it("does not show stage timing when no stages", async () => { + const user = userEvent.setup(); + renderCard({ run: makeRun({ stage_results: [] }) }); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.queryByText("Stage Timing")).not.toBeInTheDocument(); + }); + + // ---- Collapsible prompt ---- + + it("shows collapsible prompt section for each stage", async () => { + const user = userEvent.setup(); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + const toggle = screen.getByTestId("prompt-stage-0-toggle"); + expect(toggle).toBeInTheDocument(); + expect(screen.queryByTestId("prompt-stage-0-content")).not.toBeInTheDocument(); + await user.click(toggle); + expect(screen.getByTestId("prompt-stage-0-content")).toBeInTheDocument(); + expect(screen.getByText("Summarize this: {{ input_data }}")).toBeInTheDocument(); + }); + + // ---- Collapsible response with copy ---- + + it("shows collapsible response section with copy button", async () => { + const user = userEvent.setup(); + const writeText = vi.fn().mockResolvedValue(undefined); + Object.defineProperty(navigator, "clipboard", { + value: { writeText }, + writable: true, + configurable: true, + }); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + const toggle = screen.getByTestId("response-stage-0-toggle"); + await user.click(toggle); + expect(screen.getByTestId("response-stage-0-content")).toBeInTheDocument(); + expect(screen.getByText("This is a summary of the input.")).toBeInTheDocument(); + + // Copy button + const copyBtn = screen.getByTestId("response-stage-0-copy"); + expect(copyBtn).toHaveTextContent("Copy"); + await user.click(copyBtn); + expect(writeText).toHaveBeenCalledWith("This is a summary of the input."); + }); + + it("shows Copied! feedback after copy", async () => { + const user = userEvent.setup(); + Object.defineProperty(navigator, "clipboard", { + value: { writeText: vi.fn().mockResolvedValue(undefined) }, + writable: true, + configurable: true, + }); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + await user.click(screen.getByTestId("response-stage-0-toggle")); + await user.click(screen.getByTestId("response-stage-0-copy")); + expect(screen.getByTestId("response-stage-0-copy")).toHaveTextContent("Copied!"); + }); + + // ---- Metadata footer ---- + + it("shows run_id and token counts in footer", async () => { + const user = userEvent.setup(); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByText("run-abc-123")).toBeInTheDocument(); + expect(screen.getByText("120 in / 45 out tokens")).toBeInTheDocument(); + }); + + it("does not show token counts when null", async () => { + const user = userEvent.setup(); + renderCard({ run: makeRun({ tokens_in: null, tokens_out: null }) }); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.queryByText(/tokens/)).not.toBeInTheDocument(); + }); + + // ---- Multiple stages ---- + + it("renders prompt and response sections for multiple stages", async () => { + const user = userEvent.setup(); + renderCard({ + run: makeRun({ + stage_results: [ + makeStage({ stage_index: 0, prompt_sent: "Prompt A", response_raw: "Response A" }), + makeStage({ stage_index: 1, prompt_sent: "Prompt B", response_raw: "Response B" }), + ], + }), + }); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByTestId("prompt-stage-0-toggle")).toBeInTheDocument(); + expect(screen.getByTestId("prompt-stage-1-toggle")).toBeInTheDocument(); + expect(screen.getByTestId("response-stage-0-toggle")).toBeInTheDocument(); + expect(screen.getByTestId("response-stage-1-toggle")).toBeInTheDocument(); + }); + + // ---- Score bar clamping ---- + + it("clamps score bar width between 0 and 100%", async () => { + const user = userEvent.setup(); + renderCard({ + run: makeRun({ + scores: [ + { scorer_name: "over", value: 1.5 }, + { scorer_name: "under", value: -0.3 }, + ], + }), + }); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByTestId("score-bar-over").style.width).toBe("100%"); + expect(screen.getByTestId("score-bar-under").style.width).toBe("0%"); + }); + + // ---- Stage with null latency/tokens ---- + + it("handles stage with null latency and tokens", async () => { + const user = userEvent.setup(); + renderCard({ + run: makeRun({ + stage_results: [ + makeStage({ stage_index: 0, latency_ms: null, tokens_in: null, tokens_out: null }), + ], + }), + }); + await user.click(screen.getByTestId("run-card-header")); + expect(screen.getByText("Stage 1")).toBeInTheDocument(); + // No latency or token info shown + const detail = screen.getByTestId("run-card-detail"); + expect(within(detail).queryByText(/\d+\.\d+s/)).not.toBeInTheDocument(); + }); + + // ---- Prompt collapsible does not have copy button ---- + + it("prompt section does not have a copy button", async () => { + const user = userEvent.setup(); + renderCard(); + await user.click(screen.getByTestId("run-card-header")); + await user.click(screen.getByTestId("prompt-stage-0-toggle")); + expect(screen.queryByTestId("prompt-stage-0-copy")).not.toBeInTheDocument(); + }); +}); diff --git a/frontend/src/components/RunCard.tsx b/frontend/src/components/RunCard.tsx new file mode 100644 index 0000000..c8d41db --- /dev/null +++ b/frontend/src/components/RunCard.tsx @@ -0,0 +1,285 @@ +import { useState } from "react"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface RunCardStageResult { + stage_index: number; + prompt_sent: string; + response_raw: string; + model_used: string; + parameters: Record | null; + tokens_in: number | null; + tokens_out: number | null; + latency_ms: number | null; +} + +export interface RunCardScore { + scorer_name: string; + value: number; +} + +export interface RunCardData { + run_id: string; + config_summary: string; + config: Record; + status: string; + cached: boolean; + duration_ms: number | null; + tokens_in: number | null; + tokens_out: number | null; + scores: RunCardScore[]; + stage_results: RunCardStageResult[]; +} + +export interface RunCardProps { + run: RunCardData; + defaultExpanded?: boolean; +} + +// --------------------------------------------------------------------------- +// Sub-components +// --------------------------------------------------------------------------- + +function CacheStatusBadge({ cached, status }: { cached: boolean; status: string }) { + if (cached) + return ( + + cached + + ); + if (status === "completed") + return ( + + completed + + ); + if (status === "running") + return ( + + running + + ); + if (status === "failed") + return ( + + failed + + ); + return ( + + {status} + + ); +} + +function ScoreBar({ name, value }: { name: string; value: number }) { + const pct = Math.max(0, Math.min(100, value * 100)); + return ( +
+ {name} +
+ + {value.toFixed(3)} + +
+
+
+
+
+ ); +} + +function CollapsibleSection({ + title, + testId, + children, + copyText, +}: { + title: string; + testId: string; + children: React.ReactNode; + copyText?: string; +}) { + const [open, setOpen] = useState(false); + const [copied, setCopied] = useState(false); + + function handleCopy() { + if (!copyText) return; + navigator.clipboard.writeText(copyText).then(() => { + setCopied(true); + setTimeout(() => setCopied(false), 1500); + }); + } + + return ( +
+ + {open && ( +
+ {copyText && ( +
+ +
+ )} + {children} +
+ )} +
+ ); +} + +// --------------------------------------------------------------------------- +// RunCard Component +// --------------------------------------------------------------------------- + +export default function RunCard({ run, defaultExpanded = false }: RunCardProps) { + const [expanded, setExpanded] = useState(defaultExpanded); + + return ( +
+ {/* Header — always visible */} + + + {/* Expanded detail */} + {expanded && ( +
+ {/* Scores */} + {run.scores.length > 0 && ( +
+

+ Scores +

+
+ {run.scores.map((s) => ( + + ))} +
+
+ )} + + {/* Config */} +
+

+ Config +

+
+              {JSON.stringify(run.config, null, 2)}
+            
+
+ + {/* Timing breakdown per stage */} + {run.stage_results.length > 0 && ( +
+

+ Stage Timing +

+
+ {run.stage_results.map((stage) => ( +
+ + Stage {stage.stage_index + 1} + + ({stage.model_used}) + + +
+ {stage.latency_ms != null && ( + {(stage.latency_ms / 1000).toFixed(2)}s + )} + {(stage.tokens_in != null || stage.tokens_out != null) && ( + + {stage.tokens_in ?? 0}↑ {stage.tokens_out ?? 0}↓ + + )} +
+
+ ))} +
+
+ )} + + {/* Prompt sent (collapsible per stage) */} + {run.stage_results.map((stage) => ( + +
+                {stage.prompt_sent}
+              
+
+ ))} + + {/* Raw response (collapsible per stage with copy) */} + {run.stage_results.map((stage) => ( + +
+                {stage.response_raw}
+              
+
+ ))} + + {/* Metadata footer */} +
+ {run.run_id} + {(run.tokens_in != null || run.tokens_out != null) && ( + + {run.tokens_in ?? 0} in / {run.tokens_out ?? 0} out tokens + + )} +
+
+ )} +
+ ); +}