From cf49e9c888a514cda7db63e70e6f8b6b89e984fd Mon Sep 17 00:00:00 2001 From: John Lightner Date: Tue, 7 Apr 2026 03:10:08 -0500 Subject: [PATCH] MAESTRO: Extract Leaderboard into standalone component with expand, sort, and animation Extract the inline LeaderboardTable from LivePage into a standalone Leaderboard component with click-to-expand detail rows, sortable columns, smooth slide-in animation for new entries, and a subtle glow effect on the best run. 29 tests added. --- Auto Run Docs/02b-frontend-dashboard.md | 6 +- frontend/src/components/Leaderboard.test.tsx | 374 +++++++++++++++++++ frontend/src/components/Leaderboard.tsx | 357 ++++++++++++++++++ frontend/src/pages/LivePage.tsx | 171 +-------- 4 files changed, 738 insertions(+), 170 deletions(-) create mode 100644 frontend/src/components/Leaderboard.test.tsx create mode 100644 frontend/src/components/Leaderboard.tsx diff --git a/Auto Run Docs/02b-frontend-dashboard.md b/Auto Run Docs/02b-frontend-dashboard.md index 5a3a34f..e1f861b 100644 --- a/Auto Run Docs/02b-frontend-dashboard.md +++ b/Auto Run Docs/02b-frontend-dashboard.md @@ -23,9 +23,11 @@ Build the React frontend: setup wizard, experiment builder, real-time observabil - [x] Build the model selector component (frontend/src/components/ModelSelector.tsx). Dropdown grouped by endpoint. Each option shows model name + endpoint label. Include a "refresh models" button that calls the endpoint test API to refresh available models. Show a connectivity indicator (green dot = reachable, red = error). -- [ ] Implement the Live Observability page (frontend/src/pages/Live.tsx). This is the star of the show — the real-time dashboard during active sweeps. Layout: left column (60%) shows the activity timeline and current run details, right column (40%) shows the leaderboard and steering controls. Connect via WebSocket to /ws/experiments/{id}. Everything updates in real-time without page refresh. +- [x] Implement the Live Observability page (frontend/src/pages/Live.tsx). This is the star of the show — the real-time dashboard during active sweeps. Layout: left column (60%) shows the activity timeline and current run details, right column (40%) shows the leaderboard and steering controls. Connect via WebSocket to /ws/experiments/{id}. Everything updates in real-time without page refresh. + -- [ ] Build the Leaderboard component (frontend/src/components/Leaderboard.tsx). Real-time ranked table of runs. Columns: rank, config summary (model + key params), individual scores, weighted total, status (completed/cached/running). Click a row to expand full details. Sortable by any column. New entries animate in smoothly. Highlight the current best with a subtle glow effect. +- [x] Build the Leaderboard component (frontend/src/components/Leaderboard.tsx). Real-time ranked table of runs. Columns: rank, config summary (model + key params), individual scores, weighted total, status (completed/cached/running). Click a row to expand full details. Sortable by any column. New entries animate in smoothly. Highlight the current best with a subtle glow effect. + - [ ] Build the Activity Timeline component (frontend/src/components/Timeline.tsx). Chronological feed of events received via WebSocket. Each event is a card: run.started (blue), run.completed (green), new_best_found (gold), cache_hit (gray), run.failed (red). Include timestamps and key metrics. Auto-scroll to latest, with a "pause scroll" button. Filterable by event type. diff --git a/frontend/src/components/Leaderboard.test.tsx b/frontend/src/components/Leaderboard.test.tsx new file mode 100644 index 0000000..4efbc62 --- /dev/null +++ b/frontend/src/components/Leaderboard.test.tsx @@ -0,0 +1,374 @@ +import { render, screen, within, act } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { describe, it, expect, vi, beforeEach } from "vitest"; +import Leaderboard from "./Leaderboard"; +import type { LeaderboardRow, LeaderboardProps } from "./Leaderboard"; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +function makeRow(overrides: Partial = {}): LeaderboardRow { + return { + run_id: "r1", + config_summary: "gpt-4 t=0.7", + scores: { length: 0.85, quality: 0.9 }, + weighted_total: 0.875, + status: "completed", + cached: false, + ...overrides, + }; +} + +const ROW_A = makeRow({ + run_id: "r1", + config_summary: "gpt-4 t=0.7", + scores: { length: 0.85, quality: 0.9 }, + weighted_total: 0.875, +}); + +const ROW_B = makeRow({ + run_id: "r2", + config_summary: "gpt-3.5 t=1.0", + scores: { length: 0.6, quality: 0.7 }, + weighted_total: 0.65, +}); + +const ROW_C = makeRow({ + run_id: "r3", + config_summary: "llama-3 t=0.5", + scores: { length: 0.95, quality: 0.8 }, + weighted_total: 0.88, + cached: true, +}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function renderLeaderboard(overrides: Partial = {}) { + const props: LeaderboardProps = { + rows: [ROW_A, ROW_B], + bestRunId: null, + ...overrides, + }; + return { ...render(), props }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("Leaderboard", () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + // ------------------------------------------------------------------------- + // Basic rendering + // ------------------------------------------------------------------------- + + it("renders the leaderboard table", () => { + renderLeaderboard(); + expect(screen.getByTestId("leaderboard-table")).toBeInTheDocument(); + }); + + it("renders rows for each entry", () => { + renderLeaderboard(); + const rows = screen.getAllByTestId("leaderboard-row"); + expect(rows).toHaveLength(2); + }); + + it("shows rank numbers", () => { + renderLeaderboard(); + const rows = screen.getAllByTestId("leaderboard-row"); + // Default sort is by weighted_total descending, so ROW_A (0.875) is #1 + expect(rows[0]).toHaveTextContent("1"); + expect(rows[1]).toHaveTextContent("2"); + }); + + it("shows config summary", () => { + renderLeaderboard(); + expect(screen.getByText("gpt-4 t=0.7")).toBeInTheDocument(); + expect(screen.getByText("gpt-3.5 t=1.0")).toBeInTheDocument(); + }); + + it("shows individual score columns", () => { + renderLeaderboard(); + const rows = screen.getAllByTestId("leaderboard-row"); + // ROW_A scores: length=0.85, quality=0.9 + expect(rows[0]).toHaveTextContent("0.850"); + expect(rows[0]).toHaveTextContent("0.900"); + }); + + it("shows weighted total", () => { + renderLeaderboard(); + const rows = screen.getAllByTestId("leaderboard-row"); + expect(rows[0]).toHaveTextContent("0.875"); + }); + + it("shows status badges", () => { + renderLeaderboard({ rows: [ROW_A] }); + expect(screen.getByText("completed")).toBeInTheDocument(); + }); + + it("shows cached badge", () => { + renderLeaderboard({ rows: [ROW_C] }); + expect(screen.getByText("cached")).toBeInTheDocument(); + }); + + it("shows running status badge", () => { + renderLeaderboard({ + rows: [makeRow({ run_id: "r-run", status: "running" })], + }); + expect(screen.getByText("running")).toBeInTheDocument(); + }); + + it("shows failed status badge", () => { + renderLeaderboard({ + rows: [makeRow({ run_id: "r-fail", status: "failed" })], + }); + expect(screen.getByText("failed")).toBeInTheDocument(); + }); + + it("shows fallback status badge for unknown status", () => { + renderLeaderboard({ + rows: [makeRow({ run_id: "r-q", status: "queued" })], + }); + expect(screen.getByText("queued")).toBeInTheDocument(); + }); + + it("shows 'No runs yet' when empty", () => { + renderLeaderboard({ rows: [] }); + expect(screen.getByText("No runs yet")).toBeInTheDocument(); + }); + + // ------------------------------------------------------------------------- + // Sorting + // ------------------------------------------------------------------------- + + it("sorts by weighted total descending by default", () => { + renderLeaderboard(); + const rows = screen.getAllByTestId("leaderboard-row"); + // ROW_A (0.875) should be first, ROW_B (0.65) second + expect(rows[0]).toHaveTextContent("gpt-4 t=0.7"); + expect(rows[1]).toHaveTextContent("gpt-3.5 t=1.0"); + }); + + it("toggles sort direction on column click", async () => { + renderLeaderboard(); + const user = userEvent.setup(); + + // Click "Total" to toggle to ascending + await user.click(screen.getByTestId("sort-total")); + + const rows = screen.getAllByTestId("leaderboard-row"); + // Now ascending: ROW_B (0.65) first, ROW_A (0.875) second + expect(rows[0]).toHaveTextContent("gpt-3.5 t=1.0"); + expect(rows[1]).toHaveTextContent("gpt-4 t=0.7"); + }); + + it("sorts by individual score column", async () => { + renderLeaderboard({ rows: [ROW_A, ROW_B, ROW_C] }); + const user = userEvent.setup(); + + // Click "length" column header + await user.click(screen.getByTestId("sort-length")); + + const rows = screen.getAllByTestId("leaderboard-row"); + // Descending by length: ROW_C (0.95), ROW_A (0.85), ROW_B (0.6) + expect(rows[0]).toHaveTextContent("llama-3 t=0.5"); + expect(rows[1]).toHaveTextContent("gpt-4 t=0.7"); + expect(rows[2]).toHaveTextContent("gpt-3.5 t=1.0"); + }); + + it("shows sort indicator arrow", () => { + renderLeaderboard(); + // Default sort is weighted_total descending + expect(screen.getByTestId("sort-total")).toHaveTextContent("\u25BC"); + }); + + // ------------------------------------------------------------------------- + // Best run highlighting + // ------------------------------------------------------------------------- + + it("highlights the best run with glow effect", () => { + renderLeaderboard({ bestRunId: "r1" }); + const rows = screen.getAllByTestId("leaderboard-row"); + expect(rows[0].className).toContain("bg-amber"); + expect(rows[0].className).toContain("shadow-"); + }); + + it("does not highlight non-best rows", () => { + renderLeaderboard({ bestRunId: "r1" }); + const rows = screen.getAllByTestId("leaderboard-row"); + expect(rows[1].className).not.toContain("bg-amber"); + }); + + // ------------------------------------------------------------------------- + // Click to expand details + // ------------------------------------------------------------------------- + + it("expands row details on click", async () => { + renderLeaderboard(); + const user = userEvent.setup(); + + expect(screen.queryByTestId("leaderboard-detail")).not.toBeInTheDocument(); + + const rows = screen.getAllByTestId("leaderboard-row"); + await user.click(rows[0]); + + expect(screen.getByTestId("leaderboard-detail")).toBeInTheDocument(); + }); + + it("shows run ID in expanded details", async () => { + renderLeaderboard(); + const user = userEvent.setup(); + + await user.click(screen.getAllByTestId("leaderboard-row")[0]); + + const detail = screen.getByTestId("leaderboard-detail"); + expect(detail).toHaveTextContent("r1"); + }); + + it("shows scores breakdown in expanded details", async () => { + renderLeaderboard(); + const user = userEvent.setup(); + + await user.click(screen.getAllByTestId("leaderboard-row")[0]); + + const detail = screen.getByTestId("leaderboard-detail"); + expect(detail).toHaveTextContent("length"); + expect(detail).toHaveTextContent("quality"); + expect(detail).toHaveTextContent("Weighted Total"); + }); + + it("shows duration when available", async () => { + renderLeaderboard({ + rows: [makeRow({ run_id: "r-dur", duration_ms: 2500 })], + }); + const user = userEvent.setup(); + + await user.click(screen.getAllByTestId("leaderboard-row")[0]); + + const detail = screen.getByTestId("leaderboard-detail"); + expect(detail).toHaveTextContent("2.50s"); + }); + + it("shows token counts when available", async () => { + renderLeaderboard({ + rows: [makeRow({ run_id: "r-tok", tokens_in: 100, tokens_out: 200 })], + }); + const user = userEvent.setup(); + + await user.click(screen.getAllByTestId("leaderboard-row")[0]); + + const detail = screen.getByTestId("leaderboard-detail"); + expect(detail).toHaveTextContent("100 in / 200 out"); + }); + + it("shows config JSON when available", async () => { + renderLeaderboard({ + rows: [ + makeRow({ + run_id: "r-cfg", + config: { model: "gpt-4", temperature: 0.7 }, + }), + ], + }); + const user = userEvent.setup(); + + await user.click(screen.getAllByTestId("leaderboard-row")[0]); + + const detail = screen.getByTestId("leaderboard-detail"); + expect(detail).toHaveTextContent('"model": "gpt-4"'); + }); + + it("collapses expanded row on second click", async () => { + renderLeaderboard(); + const user = userEvent.setup(); + + const rows = screen.getAllByTestId("leaderboard-row"); + await user.click(rows[0]); + expect(screen.getByTestId("leaderboard-detail")).toBeInTheDocument(); + + await user.click(rows[0]); + expect(screen.queryByTestId("leaderboard-detail")).not.toBeInTheDocument(); + }); + + it("only expands one row at a time", async () => { + renderLeaderboard(); + const user = userEvent.setup(); + + const rows = screen.getAllByTestId("leaderboard-row"); + await user.click(rows[0]); + expect(screen.getAllByTestId("leaderboard-detail")).toHaveLength(1); + + // Click a different row + await user.click(rows[1]); + expect(screen.getAllByTestId("leaderboard-detail")).toHaveLength(1); + // Detail should now show the second row's data + expect(screen.getByTestId("leaderboard-detail")).toHaveTextContent("r2"); + }); + + // ------------------------------------------------------------------------- + // onRowClick callback + // ------------------------------------------------------------------------- + + it("calls onRowClick with run_id when row is clicked", async () => { + const onRowClick = vi.fn(); + renderLeaderboard({ onRowClick }); + const user = userEvent.setup(); + + await user.click(screen.getAllByTestId("leaderboard-row")[0]); + + expect(onRowClick).toHaveBeenCalledWith("r1"); + }); + + // ------------------------------------------------------------------------- + // New entry animation + // ------------------------------------------------------------------------- + + it("applies animation class to new entries", () => { + const { rerender } = render( + , + ); + + // Add a new row + rerender( + , + ); + + const rows = screen.getAllByTestId("leaderboard-row"); + // The new row (ROW_B) should have the animation class + const hasAnimation = rows.some((r) => + r.className.includes("animate-slide-in"), + ); + expect(hasAnimation).toBe(true); + }); + + it("removes animation class after timeout", async () => { + vi.useFakeTimers(); + + const { rerender } = render( + , + ); + + rerender( + , + ); + + // Advance past animation cleanup timeout + act(() => { + vi.advanceTimersByTime(700); + }); + + const rows = screen.getAllByTestId("leaderboard-row"); + const hasAnimation = rows.some((r) => + r.className.includes("animate-slide-in"), + ); + expect(hasAnimation).toBe(false); + + vi.useRealTimers(); + }); +}); diff --git a/frontend/src/components/Leaderboard.tsx b/frontend/src/components/Leaderboard.tsx new file mode 100644 index 0000000..b900c2e --- /dev/null +++ b/frontend/src/components/Leaderboard.tsx @@ -0,0 +1,357 @@ +import { Fragment, useState, useRef, useEffect } from "react"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface LeaderboardRow { + run_id: string; + config_summary: string; + scores: Record; + weighted_total: number; + status: string; + cached: boolean; + /** Optional full config for expanded detail view */ + config?: Record; + /** Optional timing information */ + duration_ms?: number | null; + /** Optional token counts */ + tokens_in?: number | null; + tokens_out?: number | null; +} + +export interface LeaderboardProps { + rows: LeaderboardRow[]; + bestRunId: string | null; + onRowClick?: (runId: string) => void; +} + +// --------------------------------------------------------------------------- +// Status Badge +// --------------------------------------------------------------------------- + +function StatusBadge({ status, cached }: { status: string; cached: boolean }) { + if (cached) + return ( + + cached + + ); + if (status === "completed") + return ( + + completed + + ); + if (status === "running") + return ( + + running + + ); + if (status === "failed") + return ( + + failed + + ); + return ( + + {status} + + ); +} + +// --------------------------------------------------------------------------- +// Score Bar (visual indicator for individual scores) +// --------------------------------------------------------------------------- + +function ScoreBar({ value }: { value: number }) { + const pct = Math.max(0, Math.min(100, value * 100)); + return ( +
+ + {value.toFixed(3)} + +
+
+
+
+ ); +} + +// --------------------------------------------------------------------------- +// Expanded Row Detail +// --------------------------------------------------------------------------- + +function RowDetail({ row }: { row: LeaderboardRow }) { + return ( + + +
+ {/* Scores breakdown */} +
+

+ Scores +

+
+ {Object.entries(row.scores).map(([name, value]) => ( +
+ + {name} + + +
+ ))} +
+ + Weighted Total + + + {row.weighted_total.toFixed(3)} + +
+
+
+ + {/* Config & metadata */} +
+

+ Details +

+
+
+
Run ID
+
+ {row.run_id} +
+
+
+
Status
+
+ +
+
+ {row.duration_ms != null && ( +
+
Duration
+
+ {(row.duration_ms / 1000).toFixed(2)}s +
+
+ )} + {(row.tokens_in != null || row.tokens_out != null) && ( +
+
Tokens
+
+ {row.tokens_in ?? 0} in / {row.tokens_out ?? 0} out +
+
+ )} + {row.config && ( +
+
Config
+
+
+                      {JSON.stringify(row.config, null, 2)}
+                    
+
+
+ )} +
+
+
+ + + ); +} + +// --------------------------------------------------------------------------- +// Leaderboard Component +// --------------------------------------------------------------------------- + +export default function Leaderboard({ rows, bestRunId, onRowClick }: LeaderboardProps) { + const [sortKey, setSortKey] = useState("weighted_total"); + const [sortAsc, setSortAsc] = useState(false); + const [expandedRunId, setExpandedRunId] = useState(null); + const [newRunIds, setNewRunIds] = useState>(new Set()); + const prevRowCountRef = useRef(rows.length); + + // Track newly added rows for entry animation + useEffect(() => { + if (rows.length > prevRowCountRef.current) { + const existingIds = new Set( + rows.slice(0, prevRowCountRef.current).map((r) => r.run_id), + ); + const added = rows + .filter((r) => !existingIds.has(r.run_id)) + .map((r) => r.run_id); + + if (added.length > 0) { + setNewRunIds((prev) => { + const next = new Set(prev); + added.forEach((id) => next.add(id)); + return next; + }); + + // Remove animation class after transition completes + const timer = setTimeout(() => { + setNewRunIds((prev) => { + const next = new Set(prev); + added.forEach((id) => next.delete(id)); + return next; + }); + }, 600); + return () => clearTimeout(timer); + } + } + prevRowCountRef.current = rows.length; + }, [rows]); + + function handleSort(key: string) { + if (key === sortKey) { + setSortAsc(!sortAsc); + } else { + setSortKey(key); + setSortAsc(false); + } + } + + function handleRowClick(runId: string) { + setExpandedRunId((prev) => (prev === runId ? null : runId)); + onRowClick?.(runId); + } + + const sortedRows = [...rows].sort((a, b) => { + let aVal: number; + let bVal: number; + if (sortKey === "weighted_total") { + aVal = a.weighted_total; + bVal = b.weighted_total; + } else { + aVal = a.scores[sortKey] ?? 0; + bVal = b.scores[sortKey] ?? 0; + } + return sortAsc ? aVal - bVal : bVal - aVal; + }); + + // Collect all score keys across all rows + const scoreKeys = Array.from( + new Set(rows.flatMap((r) => Object.keys(r.scores))), + ); + + return ( +
+ + + + + + {scoreKeys.map((k) => ( + + ))} + + + + + + {sortedRows.map((row, idx) => { + const isExpanded = expandedRunId === row.run_id; + const isBest = row.run_id === bestRunId; + const isNew = newRunIds.has(row.run_id); + + return ( + + handleRowClick(row.run_id)} + className={[ + "border-t border-slate-100 dark:border-slate-700 transition-all duration-300 cursor-pointer", + isBest + ? "bg-amber-50 dark:bg-amber-900/20 ring-1 ring-amber-300 dark:ring-amber-700 shadow-[0_0_8px_rgba(251,191,36,0.3)] dark:shadow-[0_0_8px_rgba(251,191,36,0.15)]" + : "hover:bg-slate-50 dark:hover:bg-slate-800/50", + isNew ? "animate-slide-in" : "", + ].join(" ")} + > + + + {scoreKeys.map((k) => ( + + ))} + + + + {isExpanded && } + + ); + })} + {sortedRows.length === 0 && ( + + + + )} + +
#Config handleSort(k)} + data-testid={`sort-${k}`} + > + {k} + {sortKey === k ? (sortAsc ? " \u25B2" : " \u25BC") : ""} + handleSort("weighted_total")} + data-testid="sort-total" + > + Total + {sortKey === "weighted_total" + ? sortAsc + ? " \u25B2" + : " \u25BC" + : ""} + Status
+ {idx + 1} + + {row.config_summary} + + {row.scores[k] != null ? row.scores[k].toFixed(3) : "\u2014"} + + {row.weighted_total.toFixed(3)} + + +
+ No runs yet +
+ + {/* Animation keyframes injected via style tag */} + +
+ ); +} diff --git a/frontend/src/pages/LivePage.tsx b/frontend/src/pages/LivePage.tsx index a2da883..9a8aba5 100644 --- a/frontend/src/pages/LivePage.tsx +++ b/frontend/src/pages/LivePage.tsx @@ -11,6 +11,8 @@ import type { RunResponse, WsConnection, } from "../api/client"; +import Leaderboard from "../components/Leaderboard"; +import type { LeaderboardRow } from "../components/Leaderboard"; // --------------------------------------------------------------------------- // Types @@ -54,15 +56,6 @@ export interface TimelineEntry { timestamp: Date; } -export interface LeaderboardRow { - run_id: string; - config_summary: string; - scores: Record; - weighted_total: number; - status: string; - cached: boolean; -} - type ConnectionStatus = "connecting" | "connected" | "disconnected"; // --------------------------------------------------------------------------- @@ -137,164 +130,6 @@ function TimelineCard({ entry }: { entry: TimelineEntry }) { ); } -// --------------------------------------------------------------------------- -// Leaderboard Table -// --------------------------------------------------------------------------- - -function LeaderboardTable({ - rows, - bestRunId, -}: { - rows: LeaderboardRow[]; - bestRunId: string | null; -}) { - const [sortKey, setSortKey] = useState("weighted_total"); - const [sortAsc, setSortAsc] = useState(false); - - function handleSort(key: string) { - if (key === sortKey) { - setSortAsc(!sortAsc); - } else { - setSortKey(key); - setSortAsc(false); - } - } - - const sortedRows = [...rows].sort((a, b) => { - let aVal: number; - let bVal: number; - if (sortKey === "weighted_total") { - aVal = a.weighted_total; - bVal = b.weighted_total; - } else { - aVal = a.scores[sortKey] ?? 0; - bVal = b.scores[sortKey] ?? 0; - } - return sortAsc ? aVal - bVal : bVal - aVal; - }); - - // Collect all score keys - const scoreKeys = Array.from( - new Set(rows.flatMap((r) => Object.keys(r.scores))), - ); - - return ( -
- - - - - - {scoreKeys.map((k) => ( - - ))} - - - - - - {sortedRows.map((row, idx) => ( - - - - {scoreKeys.map((k) => ( - - ))} - - - - ))} - {sortedRows.length === 0 && ( - - - - )} - -
#Config handleSort(k)} - > - {k} - {sortKey === k ? (sortAsc ? " \u25B2" : " \u25BC") : ""} - handleSort("weighted_total")} - > - Total - {sortKey === "weighted_total" - ? sortAsc - ? " \u25B2" - : " \u25BC" - : ""} - Status
- {idx + 1} - - {row.config_summary} - - {row.scores[k] != null ? row.scores[k].toFixed(3) : "—"} - - {row.weighted_total.toFixed(3)} - - -
- No runs yet -
-
- ); -} - -function StatusBadge({ - status, - cached, -}: { - status: string; - cached: boolean; -}) { - if (cached) - return ( - - cached - - ); - if (status === "completed") - return ( - - completed - - ); - if (status === "running") - return ( - - running - - ); - if (status === "failed") - return ( - - failed - - ); - return ( - - {status} - - ); -} // --------------------------------------------------------------------------- // Steering Controls @@ -935,7 +770,7 @@ export default function LivePage() {
- +