From 16c56b13f23754aa556a3f9bcd4e74a6e29b33ef Mon Sep 17 00:00:00 2001 From: John Lightner Date: Tue, 7 Apr 2026 03:06:16 -0500 Subject: [PATCH] MAESTRO: Implement Live Observability page with real-time WebSocket dashboard Full LivePage implementation with 60/40 split layout: - Left column: Activity Timeline with color-coded event cards (run.started, run.completed, new_best_found, cache_hit, run.failed), event type filtering, and auto-scroll toggle - Right column: Leaderboard table with sortable columns, best-run highlighting, and status badges; Steering Controls with pause/resume/stop (with confirmation dialogs), progress bar, token counter, cost estimate, and cache hit rate - WebSocket integration with exponential backoff reconnect, connection status indicator, and experiment subscription - 35 tests covering loading/error states, WebSocket events, timeline filtering, leaderboard updates, progress tracking, and steering control interactions --- frontend/src/App.test.tsx | 2 +- frontend/src/pages/LivePage.test.tsx | 743 +++++++++++++++++++++ frontend/src/pages/LivePage.tsx | 946 ++++++++++++++++++++++++++- 3 files changed, 1686 insertions(+), 5 deletions(-) create mode 100644 frontend/src/pages/LivePage.test.tsx diff --git a/frontend/src/App.test.tsx b/frontend/src/App.test.tsx index c2cd0f7..a514543 100644 --- a/frontend/src/App.test.tsx +++ b/frontend/src/App.test.tsx @@ -78,7 +78,7 @@ describe("App routing", () => { it("renders LivePage at /live/:id", async () => { renderWithRouter("/live/abc-123"); await waitFor(() => { - expect(screen.getByText("Live")).toBeInTheDocument(); + expect(screen.getByText("Loading experiment…")).toBeInTheDocument(); }); }); diff --git a/frontend/src/pages/LivePage.test.tsx b/frontend/src/pages/LivePage.test.tsx new file mode 100644 index 0000000..df517dd --- /dev/null +++ b/frontend/src/pages/LivePage.test.tsx @@ -0,0 +1,743 @@ +import { render, screen, waitFor, within, act } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { MemoryRouter } from "react-router-dom"; +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import LivePage from "./LivePage"; +import * as client from "../api/client"; +import type { WsConnection, WsMessageHandler } from "../api/client"; + +// --------------------------------------------------------------------------- +// Mocks +// --------------------------------------------------------------------------- + +const mockNavigate = vi.fn(); +vi.mock("react-router-dom", async () => { + const actual = await vi.importActual("react-router-dom"); + return { + ...actual, + useNavigate: () => mockNavigate, + useParams: () => ({ id: "exp-1" }), + }; +}); + +const MOCK_EXPERIMENT: client.ExperimentResponse = { + id: "exp-1", + project_id: "proj-1", + name: "Test Experiment", + description: "Testing the live page", + sample_data: null, + pipeline_stages: null, + scoring_config: null, + parameter_space: null, + status: "running", + created_at: "2026-04-07T10:00:00Z", + updated_at: "2026-04-07T10:00:00Z", +}; + +// Capture the WebSocket message handler so we can simulate events +let capturedOnMessage: WsMessageHandler | null = null; +let capturedOnClose: (() => void) | null = null; +const mockWsSend = vi.fn(); +const mockWsClose = vi.fn(); + +function renderLive() { + return render( + + + , + ); +} + +describe("LivePage", () => { + beforeEach(() => { + vi.restoreAllMocks(); + mockNavigate.mockReset(); + mockWsSend.mockReset(); + mockWsClose.mockReset(); + capturedOnMessage = null; + capturedOnClose = null; + + // Default mocks + vi.spyOn(client.experiments, "get").mockResolvedValue(MOCK_EXPERIMENT); + vi.spyOn(client.runs, "leaderboard").mockResolvedValue({ + items: [], + total: 0, + }); + + vi.spyOn(client, "connectWebSocket").mockImplementation( + (onMessage: WsMessageHandler, onClose?: () => void): WsConnection => { + capturedOnMessage = onMessage; + capturedOnClose = onClose ?? null; + return { + send: mockWsSend, + close: mockWsClose, + }; + }, + ); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + // ------------------------------------------------------------------------- + // Loading & error states + // ------------------------------------------------------------------------- + + it("shows loading state initially", () => { + vi.spyOn(client.experiments, "get").mockImplementation( + () => new Promise(() => {}), + ); + renderLive(); + expect(screen.getByText("Loading experiment…")).toBeInTheDocument(); + }); + + it("shows error state on API failure", async () => { + vi.spyOn(client.experiments, "get").mockRejectedValue( + new client.ApiError(404, "Not Found", { detail: "Not found" }), + ); + renderLive(); + + await waitFor(() => { + expect(screen.getByRole("alert")).toHaveTextContent( + "Failed to load experiment (404)", + ); + }); + }); + + it("shows network error on fetch failure", async () => { + vi.spyOn(client.experiments, "get").mockRejectedValue( + new Error("fetch failed"), + ); + renderLive(); + + await waitFor(() => { + expect(screen.getByRole("alert")).toHaveTextContent("Network error"); + }); + }); + + it("has a retry button on error", async () => { + const getSpy = vi + .spyOn(client.experiments, "get") + .mockRejectedValueOnce(new Error("fail")) + .mockResolvedValueOnce(MOCK_EXPERIMENT); + + renderLive(); + + await waitFor(() => { + expect(screen.getByText("Retry")).toBeInTheDocument(); + }); + + const user = userEvent.setup(); + await user.click(screen.getByText("Retry")); + + await waitFor(() => { + expect(screen.getByText("Test Experiment")).toBeInTheDocument(); + }); + expect(getSpy).toHaveBeenCalledTimes(2); + }); + + // ------------------------------------------------------------------------- + // Renders main layout + // ------------------------------------------------------------------------- + + it("renders experiment name and description after loading", async () => { + renderLive(); + + await waitFor(() => { + expect(screen.getByText("Test Experiment")).toBeInTheDocument(); + }); + expect(screen.getByText("Testing the live page")).toBeInTheDocument(); + }); + + it("renders activity timeline section", async () => { + renderLive(); + + await waitFor(() => { + expect(screen.getByText("Activity Timeline")).toBeInTheDocument(); + }); + expect(screen.getByText("Waiting for events…")).toBeInTheDocument(); + }); + + it("renders leaderboard section", async () => { + renderLive(); + + await waitFor(() => { + expect(screen.getByText("Leaderboard")).toBeInTheDocument(); + }); + expect(screen.getByTestId("leaderboard-table")).toBeInTheDocument(); + }); + + it("renders steering controls section", async () => { + renderLive(); + + await waitFor(() => { + expect(screen.getByTestId("steering-controls")).toBeInTheDocument(); + }); + }); + + it("renders connection indicator", async () => { + renderLive(); + + await waitFor(() => { + expect(screen.getByTestId("connection-indicator")).toBeInTheDocument(); + }); + }); + + // ------------------------------------------------------------------------- + // WebSocket connection + // ------------------------------------------------------------------------- + + it("connects WebSocket after experiment loads", async () => { + renderLive(); + + await waitFor(() => { + expect(client.connectWebSocket).toHaveBeenCalled(); + }); + }); + + it("sends subscribe message with experiment_id", async () => { + renderLive(); + + await waitFor(() => { + expect(mockWsSend).toHaveBeenCalledWith({ + type: "subscribe", + experiment_id: "exp-1", + }); + }); + }); + + it("closes WebSocket on unmount", async () => { + const { unmount } = renderLive(); + + await waitFor(() => { + expect(client.connectWebSocket).toHaveBeenCalled(); + }); + + unmount(); + expect(mockWsClose).toHaveBeenCalled(); + }); + + // ------------------------------------------------------------------------- + // Timeline events + // ------------------------------------------------------------------------- + + it("displays run.started event in timeline", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "run.started", + experiment_id: "exp-1", + run_id: "r1", + config: { model: "gpt-4", temperature: 0.7 }, + timestamp: "2026-04-07T10:01:00Z", + }); + }); + + await waitFor(() => { + const entries = screen.getAllByTestId("timeline-entry"); + expect(entries).toHaveLength(1); + expect(entries[0]).toHaveTextContent("Run Started"); + expect(entries[0]).toHaveTextContent("gpt-4"); + }); + }); + + it("displays run.completed event in timeline", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "run.completed", + experiment_id: "exp-1", + run_id: "r1", + config: { model: "gpt-4", temperature: 0.7 }, + scores: { length: 0.8 }, + weighted_total: 0.8, + timestamp: "2026-04-07T10:02:00Z", + }); + }); + + await waitFor(() => { + const entries = screen.getAllByTestId("timeline-entry"); + expect(entries).toHaveLength(1); + expect(entries[0]).toHaveTextContent("Run Completed"); + expect(entries[0]).toHaveTextContent("Score: 0.800"); + }); + }); + + it("displays new_best_found event in timeline", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "new_best_found", + experiment_id: "exp-1", + run_id: "r1", + config: { model: "gpt-4" }, + weighted_total: 0.95, + timestamp: "2026-04-07T10:02:00Z", + }); + }); + + await waitFor(() => { + const entries = screen.getAllByTestId("timeline-entry"); + expect(entries[0]).toHaveAttribute("data-event-type", "new_best_found"); + expect(entries[0]).toHaveTextContent("New Best!"); + }); + }); + + it("displays run.failed event in timeline", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "run.failed", + experiment_id: "exp-1", + run_id: "r1", + error: "Connection timeout", + timestamp: "2026-04-07T10:03:00Z", + }); + }); + + await waitFor(() => { + const entries = screen.getAllByTestId("timeline-entry"); + expect(entries[0]).toHaveTextContent("Run Failed"); + expect(entries[0]).toHaveTextContent("Connection timeout"); + }); + }); + + it("displays cache_hit event in timeline", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "cache_hit", + experiment_id: "exp-1", + run_id: "r2", + config: { model: "gpt-3.5" }, + timestamp: "2026-04-07T10:02:00Z", + }); + }); + + await waitFor(() => { + const entries = screen.getAllByTestId("timeline-entry"); + expect(entries[0]).toHaveTextContent("Cache Hit"); + }); + }); + + it("ignores ack events", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ type: "ack", data: {} }); + }); + + expect(screen.getByText("Waiting for events…")).toBeInTheDocument(); + }); + + it("ignores events for other experiments", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "run.started", + experiment_id: "other-exp", + run_id: "r1", + config: { model: "gpt-4" }, + }); + }); + + expect(screen.getByText("Waiting for events…")).toBeInTheDocument(); + }); + + // ------------------------------------------------------------------------- + // Timeline filtering + // ------------------------------------------------------------------------- + + it("filters timeline events by type", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "run.started", + experiment_id: "exp-1", + run_id: "r1", + config: { model: "gpt-4" }, + }); + capturedOnMessage!({ + type: "run.failed", + experiment_id: "exp-1", + run_id: "r2", + error: "Timeout", + }); + }); + + await waitFor(() => { + expect(screen.getAllByTestId("timeline-entry")).toHaveLength(2); + }); + + const user = userEvent.setup(); + const filter = screen.getByTestId("event-filter"); + await user.selectOptions(filter, "run.failed"); + + const entries = screen.getAllByTestId("timeline-entry"); + expect(entries).toHaveLength(1); + expect(entries[0]).toHaveTextContent("Run Failed"); + }); + + // ------------------------------------------------------------------------- + // Auto-scroll toggle + // ------------------------------------------------------------------------- + + it("toggles auto-scroll", async () => { + renderLive(); + + await waitFor(() => { + expect( + screen.getByTestId("toggle-autoscroll"), + ).toBeInTheDocument(); + }); + + expect(screen.getByTestId("toggle-autoscroll")).toHaveTextContent( + "Auto-scroll ON", + ); + + const user = userEvent.setup(); + await user.click(screen.getByTestId("toggle-autoscroll")); + + expect(screen.getByTestId("toggle-autoscroll")).toHaveTextContent( + "Auto-scroll OFF", + ); + }); + + // ------------------------------------------------------------------------- + // Leaderboard + // ------------------------------------------------------------------------- + + it("adds completed runs to leaderboard", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "run.completed", + experiment_id: "exp-1", + run_id: "r1", + config: { model: "gpt-4", temperature: 0.7 }, + scores: { length: 0.85 }, + weighted_total: 0.85, + }); + }); + + await waitFor(() => { + const rows = screen.getAllByTestId("leaderboard-row"); + expect(rows).toHaveLength(1); + expect(rows[0]).toHaveTextContent("0.850"); + }); + }); + + it("highlights the best run", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "new_best_found", + experiment_id: "exp-1", + run_id: "r1", + config: { model: "gpt-4" }, + scores: { length: 0.95 }, + weighted_total: 0.95, + }); + }); + + await waitFor(() => { + const rows = screen.getAllByTestId("leaderboard-row"); + expect(rows[0].className).toContain("bg-amber"); + }); + }); + + it("shows 'No runs yet' when leaderboard is empty", async () => { + renderLive(); + + await waitFor(() => { + expect(screen.getByText("No runs yet")).toBeInTheDocument(); + }); + }); + + // ------------------------------------------------------------------------- + // Progress updates + // ------------------------------------------------------------------------- + + it("updates progress from sweep.progress events", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "sweep.progress", + experiment_id: "exp-1", + progress: { + completed: 5, + total: 20, + cache_hits: 2, + tokens_total: 15000, + cost_total: 0.0345, + }, + }); + }); + + await waitFor(() => { + expect(screen.getByText("5 / 20 runs")).toBeInTheDocument(); + expect(screen.getByText("25%")).toBeInTheDocument(); + expect(screen.getByText("15,000")).toBeInTheDocument(); + expect(screen.getByText("$0.0345")).toBeInTheDocument(); + }); + }); + + it("updates progress bar width", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "sweep.progress", + experiment_id: "exp-1", + progress: { + completed: 10, + total: 20, + cache_hits: 0, + tokens_total: 0, + cost_total: 0, + }, + }); + }); + + await waitFor(() => { + const bar = screen.getByTestId("progress-bar-fill"); + expect(bar.style.width).toBe("50%"); + }); + }); + + // ------------------------------------------------------------------------- + // Steering controls + // ------------------------------------------------------------------------- + + it("shows pause and stop buttons when running", async () => { + renderLive(); + + await waitFor(() => { + expect( + screen.getByRole("button", { name: "Pause" }), + ).toBeInTheDocument(); + expect( + screen.getByRole("button", { name: "Stop" }), + ).toBeInTheDocument(); + }); + }); + + it("shows confirmation before pausing", async () => { + renderLive(); + + await waitFor(() => { + expect( + screen.getByRole("button", { name: "Pause" }), + ).toBeInTheDocument(); + }); + + const user = userEvent.setup(); + await user.click(screen.getByRole("button", { name: "Pause" })); + + expect(screen.getByText("Pause sweep?")).toBeInTheDocument(); + expect( + screen.getByRole("button", { name: "Confirm" }), + ).toBeInTheDocument(); + }); + + it("shows confirmation before stopping", async () => { + renderLive(); + + await waitFor(() => { + expect( + screen.getByRole("button", { name: "Stop" }), + ).toBeInTheDocument(); + }); + + const user = userEvent.setup(); + await user.click(screen.getByRole("button", { name: "Stop" })); + + expect(screen.getByText("Stop sweep?")).toBeInTheDocument(); + }); + + it("calls pause API on confirm", async () => { + const pauseSpy = vi + .spyOn(client.experiments, "pause") + .mockResolvedValue(undefined as unknown as void); + + renderLive(); + + await waitFor(() => { + expect( + screen.getByRole("button", { name: "Pause" }), + ).toBeInTheDocument(); + }); + + const user = userEvent.setup(); + await user.click(screen.getByRole("button", { name: "Pause" })); + await user.click(screen.getByRole("button", { name: "Confirm" })); + + await waitFor(() => { + expect(pauseSpy).toHaveBeenCalledWith("exp-1"); + }); + }); + + it("shows resume button when paused", async () => { + vi.spyOn(client.experiments, "get").mockResolvedValue({ + ...MOCK_EXPERIMENT, + status: "paused", + }); + + renderLive(); + + await waitFor(() => { + expect( + screen.getByRole("button", { name: "Resume" }), + ).toBeInTheDocument(); + }); + }); + + it("calls resume API", async () => { + vi.spyOn(client.experiments, "get").mockResolvedValue({ + ...MOCK_EXPERIMENT, + status: "paused", + }); + const resumeSpy = vi + .spyOn(client.experiments, "resume") + .mockResolvedValue(undefined as unknown as void); + + renderLive(); + + await waitFor(() => { + expect( + screen.getByRole("button", { name: "Resume" }), + ).toBeInTheDocument(); + }); + + const user = userEvent.setup(); + await user.click(screen.getByRole("button", { name: "Resume" })); + + await waitFor(() => { + expect(resumeSpy).toHaveBeenCalledWith("exp-1"); + }); + }); + + it("calls stop API on confirm", async () => { + const stopSpy = vi + .spyOn(client.experiments, "stop") + .mockResolvedValue(undefined as unknown as void); + + renderLive(); + + await waitFor(() => { + expect( + screen.getByRole("button", { name: "Stop" }), + ).toBeInTheDocument(); + }); + + const user = userEvent.setup(); + await user.click(screen.getByRole("button", { name: "Stop" })); + await user.click(screen.getByRole("button", { name: "Confirm" })); + + await waitFor(() => { + expect(stopSpy).toHaveBeenCalledWith("exp-1"); + }); + }); + + // ------------------------------------------------------------------------- + // sweep.completed sets status + // ------------------------------------------------------------------------- + + it("updates status to completed on sweep.completed", async () => { + renderLive(); + + await waitFor(() => { + expect(capturedOnMessage).not.toBeNull(); + }); + + act(() => { + capturedOnMessage!({ + type: "sweep.completed", + experiment_id: "exp-1", + }); + }); + + await waitFor(() => { + // Pause/Resume/Stop buttons should not be shown for completed status + expect( + screen.queryByRole("button", { name: "Pause" }), + ).not.toBeInTheDocument(); + expect( + screen.queryByRole("button", { name: "Stop" }), + ).not.toBeInTheDocument(); + }); + }); + + // ------------------------------------------------------------------------- + // Link back to experiment + // ------------------------------------------------------------------------- + + it("has a link back to the experiment page", async () => { + renderLive(); + + await waitFor(() => { + const link = screen.getByText("← Experiment"); + expect(link).toBeInTheDocument(); + expect(link.closest("a")).toHaveAttribute("href", "/experiments/exp-1"); + }); + }); +}); diff --git a/frontend/src/pages/LivePage.tsx b/frontend/src/pages/LivePage.tsx index fedddc7..a2da883 100644 --- a/frontend/src/pages/LivePage.tsx +++ b/frontend/src/pages/LivePage.tsx @@ -1,8 +1,946 @@ -export default function LivePage() { +import { useState, useEffect, useCallback, useRef } from "react"; +import { useParams, useNavigate, Link } from "react-router-dom"; +import { + experiments, + runs as runsApi, + connectWebSocket, + ApiError, +} from "../api/client"; +import type { + ExperimentResponse, + RunResponse, + WsConnection, +} from "../api/client"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export type WsEventType = + | "run.started" + | "run.completed" + | "run.failed" + | "new_best_found" + | "cache_hit" + | "sweep.progress" + | "sweep.completed" + | "ack"; + +export interface WsEvent { + type: WsEventType; + experiment_id?: string; + run_id?: string; + config?: Record; + scores?: Record; + weighted_total?: number; + cached?: boolean; + error?: string; + progress?: { + completed: number; + total: number; + cache_hits: number; + tokens_total: number; + cost_total: number; + }; + timestamp?: string; +} + +export interface TimelineEntry { + id: string; + type: WsEventType; + run_id?: string; + message: string; + detail?: string; + timestamp: Date; +} + +export interface LeaderboardRow { + run_id: string; + config_summary: string; + scores: Record; + weighted_total: number; + status: string; + cached: boolean; +} + +type ConnectionStatus = "connecting" | "connected" | "disconnected"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +let _timelineIdCounter = 0; +function nextTimelineId(): string { + _timelineIdCounter += 1; + return `tl-${_timelineIdCounter}`; +} + +function configSummary(config?: Record): string { + if (!config) return "—"; + const model = config.model ?? config.model_used ?? ""; + const temp = config.temperature != null ? `t=${config.temperature}` : ""; + const parts = [model, temp].filter(Boolean); + return parts.length > 0 ? parts.join(" ") : JSON.stringify(config).slice(0, 60); +} + +function formatTime(date: Date): string { + return date.toLocaleTimeString([], { + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + }); +} + +const EVENT_COLORS: Record = { + "run.started": "bg-blue-100 dark:bg-blue-900/40 text-blue-700 dark:text-blue-300 border-blue-200 dark:border-blue-800", + "run.completed": "bg-green-100 dark:bg-green-900/40 text-green-700 dark:text-green-300 border-green-200 dark:border-green-800", + "new_best_found": "bg-amber-100 dark:bg-amber-900/40 text-amber-700 dark:text-amber-300 border-amber-200 dark:border-amber-800", + "cache_hit": "bg-slate-100 dark:bg-slate-700/40 text-slate-600 dark:text-slate-400 border-slate-200 dark:border-slate-600", + "run.failed": "bg-red-100 dark:bg-red-900/40 text-red-700 dark:text-red-300 border-red-200 dark:border-red-800", + "sweep.progress": "bg-indigo-100 dark:bg-indigo-900/40 text-indigo-700 dark:text-indigo-300 border-indigo-200 dark:border-indigo-800", + "sweep.completed": "bg-emerald-100 dark:bg-emerald-900/40 text-emerald-700 dark:text-emerald-300 border-emerald-200 dark:border-emerald-800", +}; + +const EVENT_LABELS: Record = { + "run.started": "Run Started", + "run.completed": "Run Completed", + "new_best_found": "New Best!", + "cache_hit": "Cache Hit", + "run.failed": "Run Failed", + "sweep.progress": "Progress", + "sweep.completed": "Sweep Done", +}; + +// --------------------------------------------------------------------------- +// Timeline Event Card +// --------------------------------------------------------------------------- + +function TimelineCard({ entry }: { entry: TimelineEntry }) { + const colorClass = EVENT_COLORS[entry.type] ?? EVENT_COLORS["run.started"]; return ( -
-

Live

-

Real-time experiment progress and results.

+
+
+ + {EVENT_LABELS[entry.type] ?? entry.type} + + {formatTime(entry.timestamp)} +
+

{entry.message}

+ {entry.detail && ( +

{entry.detail}

+ )} +
+ ); +} + +// --------------------------------------------------------------------------- +// Leaderboard Table +// --------------------------------------------------------------------------- + +function LeaderboardTable({ + rows, + bestRunId, +}: { + rows: LeaderboardRow[]; + bestRunId: string | null; +}) { + const [sortKey, setSortKey] = useState("weighted_total"); + const [sortAsc, setSortAsc] = useState(false); + + function handleSort(key: string) { + if (key === sortKey) { + setSortAsc(!sortAsc); + } else { + setSortKey(key); + setSortAsc(false); + } + } + + const sortedRows = [...rows].sort((a, b) => { + let aVal: number; + let bVal: number; + if (sortKey === "weighted_total") { + aVal = a.weighted_total; + bVal = b.weighted_total; + } else { + aVal = a.scores[sortKey] ?? 0; + bVal = b.scores[sortKey] ?? 0; + } + return sortAsc ? aVal - bVal : bVal - aVal; + }); + + // Collect all score keys + const scoreKeys = Array.from( + new Set(rows.flatMap((r) => Object.keys(r.scores))), + ); + + return ( +
+ + + + + + {scoreKeys.map((k) => ( + + ))} + + + + + + {sortedRows.map((row, idx) => ( + + + + {scoreKeys.map((k) => ( + + ))} + + + + ))} + {sortedRows.length === 0 && ( + + + + )} + +
#Config handleSort(k)} + > + {k} + {sortKey === k ? (sortAsc ? " \u25B2" : " \u25BC") : ""} + handleSort("weighted_total")} + > + Total + {sortKey === "weighted_total" + ? sortAsc + ? " \u25B2" + : " \u25BC" + : ""} + Status
+ {idx + 1} + + {row.config_summary} + + {row.scores[k] != null ? row.scores[k].toFixed(3) : "—"} + + {row.weighted_total.toFixed(3)} + + +
+ No runs yet +
+
+ ); +} + +function StatusBadge({ + status, + cached, +}: { + status: string; + cached: boolean; +}) { + if (cached) + return ( + + cached + + ); + if (status === "completed") + return ( + + completed + + ); + if (status === "running") + return ( + + running + + ); + if (status === "failed") + return ( + + failed + + ); + return ( + + {status} + + ); +} + +// --------------------------------------------------------------------------- +// Steering Controls +// --------------------------------------------------------------------------- + +function SteeringControls({ + experimentId, + experimentStatus, + progress, + onStatusChange, +}: { + experimentId: string; + experimentStatus: string; + progress: { completed: number; total: number; cache_hits: number; tokens_total: number; cost_total: number }; + onStatusChange: (status: string) => void; +}) { + const [confirming, setConfirming] = useState<"pause" | "stop" | null>(null); + const [actionLoading, setActionLoading] = useState(false); + + async function handleAction(action: "pause" | "resume" | "stop") { + setActionLoading(true); + try { + if (action === "pause") { + await experiments.pause(experimentId); + onStatusChange("paused"); + } else if (action === "resume") { + await experiments.resume(experimentId); + onStatusChange("running"); + } else if (action === "stop") { + await experiments.stop(experimentId); + onStatusChange("stopped"); + } + } catch { + // Silently handle — status will sync via WS + } finally { + setActionLoading(false); + setConfirming(null); + } + } + + const isRunning = experimentStatus === "running" || experimentStatus === "sweeping"; + const isPaused = experimentStatus === "paused"; + const pct = + progress.total > 0 + ? Math.round((progress.completed / progress.total) * 100) + : 0; + const cacheRate = + progress.completed > 0 + ? Math.round((progress.cache_hits / progress.completed) * 100) + : 0; + + return ( +
+ {/* Action buttons */} +
+ {isRunning && confirming !== "pause" && ( + + )} + {isRunning && confirming === "pause" && ( +
+ + Pause sweep? + + + +
+ )} + + {isPaused && ( + + )} + + {(isRunning || isPaused) && confirming !== "stop" && ( + + )} + {(isRunning || isPaused) && confirming === "stop" && ( +
+ + Stop sweep? + + + +
+ )} +
+ + {/* Progress bar */} +
+
+ + {progress.completed} / {progress.total} runs + + {pct}% +
+
+
+
+
+ + {/* Stats */} +
+
+
Tokens
+
+ {progress.tokens_total.toLocaleString()} +
+
+
+
Est. Cost
+
+ ${progress.cost_total.toFixed(4)} +
+
+
+
Cache Rate
+
+ {cacheRate}% +
+
+
+
Status
+
+ {experimentStatus} +
+
+
+
+ ); +} + +// --------------------------------------------------------------------------- +// Connection Status Indicator +// --------------------------------------------------------------------------- + +function ConnectionIndicator({ status }: { status: ConnectionStatus }) { + const colors: Record = { + connected: "bg-green-500", + connecting: "bg-amber-500 animate-pulse", + disconnected: "bg-red-500", + }; + + return ( + + + {status === "connected" + ? "Live" + : status === "connecting" + ? "Connecting…" + : "Disconnected"} + + ); +} + +// --------------------------------------------------------------------------- +// Live Page +// --------------------------------------------------------------------------- + +export default function LivePage() { + const { id } = useParams<{ id: string }>(); + const navigate = useNavigate(); + + // Experiment state + const [experiment, setExperiment] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [expStatus, setExpStatus] = useState("idle"); + + // WebSocket state + const [connectionStatus, setConnectionStatus] = + useState("disconnected"); + const wsRef = useRef(null); + const reconnectTimerRef = useRef | null>(null); + const reconnectAttemptRef = useRef(0); + + // Timeline + const [timeline, setTimeline] = useState([]); + const [autoScroll, setAutoScroll] = useState(true); + const [eventFilter, setEventFilter] = useState("all"); + const timelineEndRef = useRef(null); + + // Leaderboard + const [leaderboard, setLeaderboard] = useState([]); + const [bestRunId, setBestRunId] = useState(null); + + // Progress + const [progress, setProgress] = useState({ + completed: 0, + total: 0, + cache_hits: 0, + tokens_total: 0, + cost_total: 0, + }); + + // ------------------------------------------------------------------------- + // Load experiment + // ------------------------------------------------------------------------- + + const loadExperiment = useCallback(async () => { + if (!id) return; + setLoading(true); + setError(null); + try { + const exp = await experiments.get(id); + setExperiment(exp); + setExpStatus(exp.status); + } catch (err: unknown) { + if (err instanceof ApiError) { + setError(`Failed to load experiment (${err.status}).`); + } else { + setError("Network error. Is the server running?"); + } + } finally { + setLoading(false); + } + }, [id]); + + // ------------------------------------------------------------------------- + // Load initial leaderboard + // ------------------------------------------------------------------------- + + const loadLeaderboard = useCallback(async () => { + if (!id) return; + try { + const resp = await runsApi.leaderboard(id); + const rows: LeaderboardRow[] = resp.items.map((r: RunResponse) => ({ + run_id: r.id, + config_summary: configSummary(r.config), + scores: {}, + weighted_total: 0, + status: r.status, + cached: false, + })); + setLeaderboard(rows); + if (rows.length > 0) { + setBestRunId(rows[0].run_id); + } + } catch { + // Non-critical — leaderboard will populate via WebSocket + } + }, [id]); + + useEffect(() => { + loadExperiment(); + loadLeaderboard(); + }, [loadExperiment, loadLeaderboard]); + + // ------------------------------------------------------------------------- + // Process incoming WS events + // ------------------------------------------------------------------------- + + const processEvent = useCallback( + (raw: unknown) => { + const evt = raw as WsEvent; + if (!evt.type || evt.type === "ack") return; + + // Filter events not for this experiment (if the server sends global events) + if (evt.experiment_id && evt.experiment_id !== id) return; + + const now = new Date(evt.timestamp ?? Date.now()); + + // Build timeline entry + let message = ""; + let detail: string | undefined; + + switch (evt.type) { + case "run.started": + message = `Run started: ${configSummary(evt.config)}`; + break; + case "run.completed": + message = `Run completed: ${configSummary(evt.config)}`; + detail = + evt.weighted_total != null + ? `Score: ${evt.weighted_total.toFixed(3)}` + : undefined; + break; + case "new_best_found": + message = `New best config found!`; + detail = + evt.weighted_total != null + ? `Score: ${evt.weighted_total.toFixed(3)}` + : undefined; + break; + case "cache_hit": + message = `Cache hit: ${configSummary(evt.config)}`; + break; + case "run.failed": + message = `Run failed: ${evt.error ?? "unknown error"}`; + break; + case "sweep.progress": + message = `Progress: ${evt.progress?.completed ?? 0}/${evt.progress?.total ?? 0} runs`; + break; + case "sweep.completed": + message = "Sweep completed!"; + setExpStatus("completed"); + break; + default: + message = evt.type; + } + + setTimeline((prev) => [ + ...prev, + { + id: nextTimelineId(), + type: evt.type, + run_id: evt.run_id, + message, + detail, + timestamp: now, + }, + ]); + + // Update leaderboard + if ( + (evt.type === "run.completed" || evt.type === "new_best_found") && + evt.run_id + ) { + const row: LeaderboardRow = { + run_id: evt.run_id, + config_summary: configSummary(evt.config), + scores: evt.scores ?? {}, + weighted_total: evt.weighted_total ?? 0, + status: "completed", + cached: evt.cached ?? false, + }; + + setLeaderboard((prev) => { + const existing = prev.findIndex((r) => r.run_id === row.run_id); + if (existing >= 0) { + const next = [...prev]; + next[existing] = row; + return next; + } + return [...prev, row]; + }); + + if (evt.type === "new_best_found") { + setBestRunId(evt.run_id); + } + } + + if (evt.type === "cache_hit" && evt.run_id) { + const row: LeaderboardRow = { + run_id: evt.run_id, + config_summary: configSummary(evt.config), + scores: evt.scores ?? {}, + weighted_total: evt.weighted_total ?? 0, + status: "completed", + cached: true, + }; + setLeaderboard((prev) => { + const existing = prev.findIndex((r) => r.run_id === row.run_id); + if (existing >= 0) return prev; + return [...prev, row]; + }); + } + + // Update progress + if (evt.progress) { + setProgress(evt.progress); + } + }, + [id], + ); + + // ------------------------------------------------------------------------- + // WebSocket connection with exponential backoff reconnect + // ------------------------------------------------------------------------- + + const connectWs = useCallback(() => { + if (wsRef.current) { + wsRef.current.close(); + } + + setConnectionStatus("connecting"); + + const conn = connectWebSocket( + (data) => processEvent(data), + () => { + // onClose + setConnectionStatus("disconnected"); + wsRef.current = null; + + // Reconnect with exponential backoff + const attempt = reconnectAttemptRef.current; + const delay = Math.min(1000 * Math.pow(2, attempt), 30000); + reconnectAttemptRef.current = attempt + 1; + + reconnectTimerRef.current = setTimeout(() => { + connectWs(); + }, delay); + }, + ); + + wsRef.current = conn; + setConnectionStatus("connected"); + reconnectAttemptRef.current = 0; + + // Subscribe to experiment events + if (id) { + conn.send({ type: "subscribe", experiment_id: id }); + } + }, [id, processEvent]); + + useEffect(() => { + if (!id || loading || error) return; + + connectWs(); + + return () => { + if (reconnectTimerRef.current) { + clearTimeout(reconnectTimerRef.current); + } + if (wsRef.current) { + wsRef.current.close(); + wsRef.current = null; + } + }; + }, [id, loading, error, connectWs]); + + // ------------------------------------------------------------------------- + // Auto-scroll timeline + // ------------------------------------------------------------------------- + + useEffect(() => { + if (autoScroll && timelineEndRef.current) { + timelineEndRef.current.scrollIntoView?.({ behavior: "smooth" }); + } + }, [timeline, autoScroll]); + + // ------------------------------------------------------------------------- + // Filter timeline + // ------------------------------------------------------------------------- + + const filteredTimeline = + eventFilter === "all" + ? timeline + : timeline.filter((e) => e.type === eventFilter); + + // ------------------------------------------------------------------------- + // Render + // ------------------------------------------------------------------------- + + if (loading) { + return ( +
+

+ Loading experiment… +

+
+ ); + } + + if (error) { + return ( +
+
+
+

{error}

+ +
+
+
+ ); + } + + return ( +
+
+ {/* Header */} +
+
+
+ + ← Experiment + + +
+

+ {experiment?.name ?? "Live Dashboard"} +

+ {experiment?.description && ( +

+ {experiment.description} +

+ )} +
+
+ + {/* Main layout: 60/40 split */} +
+ {/* Left column — 60% */} +
+ {/* Activity Timeline */} +
+
+

+ Activity Timeline +

+
+ {/* Filter */} + + + {/* Pause scroll */} + +
+
+ +
+ {filteredTimeline.length === 0 && ( +

+ {timeline.length === 0 + ? "Waiting for events…" + : "No matching events"} +

+ )} + {filteredTimeline.map((entry) => ( + + ))} +
+
+
+
+ + {/* Right column — 40% */} +
+ {/* Steering Controls */} +
+

+ Controls +

+ +
+ + {/* Leaderboard */} +
+
+

+ Leaderboard +

+
+
+ +
+
+
+
+
); }