MAESTRO: Implement Live Observability page with real-time WebSocket dashboard

Full LivePage implementation with 60/40 split layout:
- Left column: Activity Timeline with color-coded event cards (run.started, run.completed, new_best_found, cache_hit, run.failed), event type filtering, and auto-scroll toggle
- Right column: Leaderboard table with sortable columns, best-run highlighting, and status badges; Steering Controls with pause/resume/stop (with confirmation dialogs), progress bar, token counter, cost estimate, and cache hit rate
- WebSocket integration with exponential backoff reconnect, connection status indicator, and experiment subscription
- 35 tests covering loading/error states, WebSocket events, timeline filtering, leaderboard updates, progress tracking, and steering control interactions
This commit is contained in:
John Lightner 2026-04-07 03:06:16 -05:00
parent fb78eac1b0
commit 16c56b13f2
3 changed files with 1686 additions and 5 deletions

View file

@ -78,7 +78,7 @@ describe("App routing", () => {
it("renders LivePage at /live/:id", async () => {
renderWithRouter("/live/abc-123");
await waitFor(() => {
expect(screen.getByText("Live")).toBeInTheDocument();
expect(screen.getByText("Loading experiment…")).toBeInTheDocument();
});
});

View file

@ -0,0 +1,743 @@
import { render, screen, waitFor, within, act } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { MemoryRouter } from "react-router-dom";
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import LivePage from "./LivePage";
import * as client from "../api/client";
import type { WsConnection, WsMessageHandler } from "../api/client";
// ---------------------------------------------------------------------------
// Mocks
// ---------------------------------------------------------------------------
const mockNavigate = vi.fn();
vi.mock("react-router-dom", async () => {
const actual = await vi.importActual("react-router-dom");
return {
...actual,
useNavigate: () => mockNavigate,
useParams: () => ({ id: "exp-1" }),
};
});
const MOCK_EXPERIMENT: client.ExperimentResponse = {
id: "exp-1",
project_id: "proj-1",
name: "Test Experiment",
description: "Testing the live page",
sample_data: null,
pipeline_stages: null,
scoring_config: null,
parameter_space: null,
status: "running",
created_at: "2026-04-07T10:00:00Z",
updated_at: "2026-04-07T10:00:00Z",
};
// Capture the WebSocket message handler so we can simulate events
let capturedOnMessage: WsMessageHandler | null = null;
let capturedOnClose: (() => void) | null = null;
const mockWsSend = vi.fn();
const mockWsClose = vi.fn();
function renderLive() {
return render(
<MemoryRouter initialEntries={["/live/exp-1"]}>
<LivePage />
</MemoryRouter>,
);
}
describe("LivePage", () => {
beforeEach(() => {
vi.restoreAllMocks();
mockNavigate.mockReset();
mockWsSend.mockReset();
mockWsClose.mockReset();
capturedOnMessage = null;
capturedOnClose = null;
// Default mocks
vi.spyOn(client.experiments, "get").mockResolvedValue(MOCK_EXPERIMENT);
vi.spyOn(client.runs, "leaderboard").mockResolvedValue({
items: [],
total: 0,
});
vi.spyOn(client, "connectWebSocket").mockImplementation(
(onMessage: WsMessageHandler, onClose?: () => void): WsConnection => {
capturedOnMessage = onMessage;
capturedOnClose = onClose ?? null;
return {
send: mockWsSend,
close: mockWsClose,
};
},
);
});
afterEach(() => {
vi.restoreAllMocks();
});
// -------------------------------------------------------------------------
// Loading & error states
// -------------------------------------------------------------------------
it("shows loading state initially", () => {
vi.spyOn(client.experiments, "get").mockImplementation(
() => new Promise(() => {}),
);
renderLive();
expect(screen.getByText("Loading experiment…")).toBeInTheDocument();
});
it("shows error state on API failure", async () => {
vi.spyOn(client.experiments, "get").mockRejectedValue(
new client.ApiError(404, "Not Found", { detail: "Not found" }),
);
renderLive();
await waitFor(() => {
expect(screen.getByRole("alert")).toHaveTextContent(
"Failed to load experiment (404)",
);
});
});
it("shows network error on fetch failure", async () => {
vi.spyOn(client.experiments, "get").mockRejectedValue(
new Error("fetch failed"),
);
renderLive();
await waitFor(() => {
expect(screen.getByRole("alert")).toHaveTextContent("Network error");
});
});
it("has a retry button on error", async () => {
const getSpy = vi
.spyOn(client.experiments, "get")
.mockRejectedValueOnce(new Error("fail"))
.mockResolvedValueOnce(MOCK_EXPERIMENT);
renderLive();
await waitFor(() => {
expect(screen.getByText("Retry")).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByText("Retry"));
await waitFor(() => {
expect(screen.getByText("Test Experiment")).toBeInTheDocument();
});
expect(getSpy).toHaveBeenCalledTimes(2);
});
// -------------------------------------------------------------------------
// Renders main layout
// -------------------------------------------------------------------------
it("renders experiment name and description after loading", async () => {
renderLive();
await waitFor(() => {
expect(screen.getByText("Test Experiment")).toBeInTheDocument();
});
expect(screen.getByText("Testing the live page")).toBeInTheDocument();
});
it("renders activity timeline section", async () => {
renderLive();
await waitFor(() => {
expect(screen.getByText("Activity Timeline")).toBeInTheDocument();
});
expect(screen.getByText("Waiting for events…")).toBeInTheDocument();
});
it("renders leaderboard section", async () => {
renderLive();
await waitFor(() => {
expect(screen.getByText("Leaderboard")).toBeInTheDocument();
});
expect(screen.getByTestId("leaderboard-table")).toBeInTheDocument();
});
it("renders steering controls section", async () => {
renderLive();
await waitFor(() => {
expect(screen.getByTestId("steering-controls")).toBeInTheDocument();
});
});
it("renders connection indicator", async () => {
renderLive();
await waitFor(() => {
expect(screen.getByTestId("connection-indicator")).toBeInTheDocument();
});
});
// -------------------------------------------------------------------------
// WebSocket connection
// -------------------------------------------------------------------------
it("connects WebSocket after experiment loads", async () => {
renderLive();
await waitFor(() => {
expect(client.connectWebSocket).toHaveBeenCalled();
});
});
it("sends subscribe message with experiment_id", async () => {
renderLive();
await waitFor(() => {
expect(mockWsSend).toHaveBeenCalledWith({
type: "subscribe",
experiment_id: "exp-1",
});
});
});
it("closes WebSocket on unmount", async () => {
const { unmount } = renderLive();
await waitFor(() => {
expect(client.connectWebSocket).toHaveBeenCalled();
});
unmount();
expect(mockWsClose).toHaveBeenCalled();
});
// -------------------------------------------------------------------------
// Timeline events
// -------------------------------------------------------------------------
it("displays run.started event in timeline", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "run.started",
experiment_id: "exp-1",
run_id: "r1",
config: { model: "gpt-4", temperature: 0.7 },
timestamp: "2026-04-07T10:01:00Z",
});
});
await waitFor(() => {
const entries = screen.getAllByTestId("timeline-entry");
expect(entries).toHaveLength(1);
expect(entries[0]).toHaveTextContent("Run Started");
expect(entries[0]).toHaveTextContent("gpt-4");
});
});
it("displays run.completed event in timeline", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "run.completed",
experiment_id: "exp-1",
run_id: "r1",
config: { model: "gpt-4", temperature: 0.7 },
scores: { length: 0.8 },
weighted_total: 0.8,
timestamp: "2026-04-07T10:02:00Z",
});
});
await waitFor(() => {
const entries = screen.getAllByTestId("timeline-entry");
expect(entries).toHaveLength(1);
expect(entries[0]).toHaveTextContent("Run Completed");
expect(entries[0]).toHaveTextContent("Score: 0.800");
});
});
it("displays new_best_found event in timeline", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "new_best_found",
experiment_id: "exp-1",
run_id: "r1",
config: { model: "gpt-4" },
weighted_total: 0.95,
timestamp: "2026-04-07T10:02:00Z",
});
});
await waitFor(() => {
const entries = screen.getAllByTestId("timeline-entry");
expect(entries[0]).toHaveAttribute("data-event-type", "new_best_found");
expect(entries[0]).toHaveTextContent("New Best!");
});
});
it("displays run.failed event in timeline", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "run.failed",
experiment_id: "exp-1",
run_id: "r1",
error: "Connection timeout",
timestamp: "2026-04-07T10:03:00Z",
});
});
await waitFor(() => {
const entries = screen.getAllByTestId("timeline-entry");
expect(entries[0]).toHaveTextContent("Run Failed");
expect(entries[0]).toHaveTextContent("Connection timeout");
});
});
it("displays cache_hit event in timeline", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "cache_hit",
experiment_id: "exp-1",
run_id: "r2",
config: { model: "gpt-3.5" },
timestamp: "2026-04-07T10:02:00Z",
});
});
await waitFor(() => {
const entries = screen.getAllByTestId("timeline-entry");
expect(entries[0]).toHaveTextContent("Cache Hit");
});
});
it("ignores ack events", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({ type: "ack", data: {} });
});
expect(screen.getByText("Waiting for events…")).toBeInTheDocument();
});
it("ignores events for other experiments", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "run.started",
experiment_id: "other-exp",
run_id: "r1",
config: { model: "gpt-4" },
});
});
expect(screen.getByText("Waiting for events…")).toBeInTheDocument();
});
// -------------------------------------------------------------------------
// Timeline filtering
// -------------------------------------------------------------------------
it("filters timeline events by type", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "run.started",
experiment_id: "exp-1",
run_id: "r1",
config: { model: "gpt-4" },
});
capturedOnMessage!({
type: "run.failed",
experiment_id: "exp-1",
run_id: "r2",
error: "Timeout",
});
});
await waitFor(() => {
expect(screen.getAllByTestId("timeline-entry")).toHaveLength(2);
});
const user = userEvent.setup();
const filter = screen.getByTestId("event-filter");
await user.selectOptions(filter, "run.failed");
const entries = screen.getAllByTestId("timeline-entry");
expect(entries).toHaveLength(1);
expect(entries[0]).toHaveTextContent("Run Failed");
});
// -------------------------------------------------------------------------
// Auto-scroll toggle
// -------------------------------------------------------------------------
it("toggles auto-scroll", async () => {
renderLive();
await waitFor(() => {
expect(
screen.getByTestId("toggle-autoscroll"),
).toBeInTheDocument();
});
expect(screen.getByTestId("toggle-autoscroll")).toHaveTextContent(
"Auto-scroll ON",
);
const user = userEvent.setup();
await user.click(screen.getByTestId("toggle-autoscroll"));
expect(screen.getByTestId("toggle-autoscroll")).toHaveTextContent(
"Auto-scroll OFF",
);
});
// -------------------------------------------------------------------------
// Leaderboard
// -------------------------------------------------------------------------
it("adds completed runs to leaderboard", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "run.completed",
experiment_id: "exp-1",
run_id: "r1",
config: { model: "gpt-4", temperature: 0.7 },
scores: { length: 0.85 },
weighted_total: 0.85,
});
});
await waitFor(() => {
const rows = screen.getAllByTestId("leaderboard-row");
expect(rows).toHaveLength(1);
expect(rows[0]).toHaveTextContent("0.850");
});
});
it("highlights the best run", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "new_best_found",
experiment_id: "exp-1",
run_id: "r1",
config: { model: "gpt-4" },
scores: { length: 0.95 },
weighted_total: 0.95,
});
});
await waitFor(() => {
const rows = screen.getAllByTestId("leaderboard-row");
expect(rows[0].className).toContain("bg-amber");
});
});
it("shows 'No runs yet' when leaderboard is empty", async () => {
renderLive();
await waitFor(() => {
expect(screen.getByText("No runs yet")).toBeInTheDocument();
});
});
// -------------------------------------------------------------------------
// Progress updates
// -------------------------------------------------------------------------
it("updates progress from sweep.progress events", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "sweep.progress",
experiment_id: "exp-1",
progress: {
completed: 5,
total: 20,
cache_hits: 2,
tokens_total: 15000,
cost_total: 0.0345,
},
});
});
await waitFor(() => {
expect(screen.getByText("5 / 20 runs")).toBeInTheDocument();
expect(screen.getByText("25%")).toBeInTheDocument();
expect(screen.getByText("15,000")).toBeInTheDocument();
expect(screen.getByText("$0.0345")).toBeInTheDocument();
});
});
it("updates progress bar width", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "sweep.progress",
experiment_id: "exp-1",
progress: {
completed: 10,
total: 20,
cache_hits: 0,
tokens_total: 0,
cost_total: 0,
},
});
});
await waitFor(() => {
const bar = screen.getByTestId("progress-bar-fill");
expect(bar.style.width).toBe("50%");
});
});
// -------------------------------------------------------------------------
// Steering controls
// -------------------------------------------------------------------------
it("shows pause and stop buttons when running", async () => {
renderLive();
await waitFor(() => {
expect(
screen.getByRole("button", { name: "Pause" }),
).toBeInTheDocument();
expect(
screen.getByRole("button", { name: "Stop" }),
).toBeInTheDocument();
});
});
it("shows confirmation before pausing", async () => {
renderLive();
await waitFor(() => {
expect(
screen.getByRole("button", { name: "Pause" }),
).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Pause" }));
expect(screen.getByText("Pause sweep?")).toBeInTheDocument();
expect(
screen.getByRole("button", { name: "Confirm" }),
).toBeInTheDocument();
});
it("shows confirmation before stopping", async () => {
renderLive();
await waitFor(() => {
expect(
screen.getByRole("button", { name: "Stop" }),
).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Stop" }));
expect(screen.getByText("Stop sweep?")).toBeInTheDocument();
});
it("calls pause API on confirm", async () => {
const pauseSpy = vi
.spyOn(client.experiments, "pause")
.mockResolvedValue(undefined as unknown as void);
renderLive();
await waitFor(() => {
expect(
screen.getByRole("button", { name: "Pause" }),
).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Pause" }));
await user.click(screen.getByRole("button", { name: "Confirm" }));
await waitFor(() => {
expect(pauseSpy).toHaveBeenCalledWith("exp-1");
});
});
it("shows resume button when paused", async () => {
vi.spyOn(client.experiments, "get").mockResolvedValue({
...MOCK_EXPERIMENT,
status: "paused",
});
renderLive();
await waitFor(() => {
expect(
screen.getByRole("button", { name: "Resume" }),
).toBeInTheDocument();
});
});
it("calls resume API", async () => {
vi.spyOn(client.experiments, "get").mockResolvedValue({
...MOCK_EXPERIMENT,
status: "paused",
});
const resumeSpy = vi
.spyOn(client.experiments, "resume")
.mockResolvedValue(undefined as unknown as void);
renderLive();
await waitFor(() => {
expect(
screen.getByRole("button", { name: "Resume" }),
).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Resume" }));
await waitFor(() => {
expect(resumeSpy).toHaveBeenCalledWith("exp-1");
});
});
it("calls stop API on confirm", async () => {
const stopSpy = vi
.spyOn(client.experiments, "stop")
.mockResolvedValue(undefined as unknown as void);
renderLive();
await waitFor(() => {
expect(
screen.getByRole("button", { name: "Stop" }),
).toBeInTheDocument();
});
const user = userEvent.setup();
await user.click(screen.getByRole("button", { name: "Stop" }));
await user.click(screen.getByRole("button", { name: "Confirm" }));
await waitFor(() => {
expect(stopSpy).toHaveBeenCalledWith("exp-1");
});
});
// -------------------------------------------------------------------------
// sweep.completed sets status
// -------------------------------------------------------------------------
it("updates status to completed on sweep.completed", async () => {
renderLive();
await waitFor(() => {
expect(capturedOnMessage).not.toBeNull();
});
act(() => {
capturedOnMessage!({
type: "sweep.completed",
experiment_id: "exp-1",
});
});
await waitFor(() => {
// Pause/Resume/Stop buttons should not be shown for completed status
expect(
screen.queryByRole("button", { name: "Pause" }),
).not.toBeInTheDocument();
expect(
screen.queryByRole("button", { name: "Stop" }),
).not.toBeInTheDocument();
});
});
// -------------------------------------------------------------------------
// Link back to experiment
// -------------------------------------------------------------------------
it("has a link back to the experiment page", async () => {
renderLive();
await waitFor(() => {
const link = screen.getByText("← Experiment");
expect(link).toBeInTheDocument();
expect(link.closest("a")).toHaveAttribute("href", "/experiments/exp-1");
});
});
});

View file

@ -1,8 +1,946 @@
export default function LivePage() {
import { useState, useEffect, useCallback, useRef } from "react";
import { useParams, useNavigate, Link } from "react-router-dom";
import {
experiments,
runs as runsApi,
connectWebSocket,
ApiError,
} from "../api/client";
import type {
ExperimentResponse,
RunResponse,
WsConnection,
} from "../api/client";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export type WsEventType =
| "run.started"
| "run.completed"
| "run.failed"
| "new_best_found"
| "cache_hit"
| "sweep.progress"
| "sweep.completed"
| "ack";
export interface WsEvent {
type: WsEventType;
experiment_id?: string;
run_id?: string;
config?: Record<string, unknown>;
scores?: Record<string, number>;
weighted_total?: number;
cached?: boolean;
error?: string;
progress?: {
completed: number;
total: number;
cache_hits: number;
tokens_total: number;
cost_total: number;
};
timestamp?: string;
}
export interface TimelineEntry {
id: string;
type: WsEventType;
run_id?: string;
message: string;
detail?: string;
timestamp: Date;
}
export interface LeaderboardRow {
run_id: string;
config_summary: string;
scores: Record<string, number>;
weighted_total: number;
status: string;
cached: boolean;
}
type ConnectionStatus = "connecting" | "connected" | "disconnected";
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
let _timelineIdCounter = 0;
function nextTimelineId(): string {
_timelineIdCounter += 1;
return `tl-${_timelineIdCounter}`;
}
function configSummary(config?: Record<string, unknown>): string {
if (!config) return "—";
const model = config.model ?? config.model_used ?? "";
const temp = config.temperature != null ? `t=${config.temperature}` : "";
const parts = [model, temp].filter(Boolean);
return parts.length > 0 ? parts.join(" ") : JSON.stringify(config).slice(0, 60);
}
function formatTime(date: Date): string {
return date.toLocaleTimeString([], {
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
});
}
const EVENT_COLORS: Record<string, string> = {
"run.started": "bg-blue-100 dark:bg-blue-900/40 text-blue-700 dark:text-blue-300 border-blue-200 dark:border-blue-800",
"run.completed": "bg-green-100 dark:bg-green-900/40 text-green-700 dark:text-green-300 border-green-200 dark:border-green-800",
"new_best_found": "bg-amber-100 dark:bg-amber-900/40 text-amber-700 dark:text-amber-300 border-amber-200 dark:border-amber-800",
"cache_hit": "bg-slate-100 dark:bg-slate-700/40 text-slate-600 dark:text-slate-400 border-slate-200 dark:border-slate-600",
"run.failed": "bg-red-100 dark:bg-red-900/40 text-red-700 dark:text-red-300 border-red-200 dark:border-red-800",
"sweep.progress": "bg-indigo-100 dark:bg-indigo-900/40 text-indigo-700 dark:text-indigo-300 border-indigo-200 dark:border-indigo-800",
"sweep.completed": "bg-emerald-100 dark:bg-emerald-900/40 text-emerald-700 dark:text-emerald-300 border-emerald-200 dark:border-emerald-800",
};
const EVENT_LABELS: Record<string, string> = {
"run.started": "Run Started",
"run.completed": "Run Completed",
"new_best_found": "New Best!",
"cache_hit": "Cache Hit",
"run.failed": "Run Failed",
"sweep.progress": "Progress",
"sweep.completed": "Sweep Done",
};
// ---------------------------------------------------------------------------
// Timeline Event Card
// ---------------------------------------------------------------------------
function TimelineCard({ entry }: { entry: TimelineEntry }) {
const colorClass = EVENT_COLORS[entry.type] ?? EVENT_COLORS["run.started"];
return (
<div className="p-8">
<h1 className="mb-4 text-2xl font-bold">Live</h1>
<p className="text-gray-600">Real-time experiment progress and results.</p>
<div
data-testid="timeline-entry"
data-event-type={entry.type}
className={`rounded-lg border p-3 ${colorClass} transition-all duration-300 animate-in`}
>
<div className="flex items-center justify-between mb-1">
<span className="text-xs font-semibold uppercase tracking-wide">
{EVENT_LABELS[entry.type] ?? entry.type}
</span>
<span className="text-xs opacity-70">{formatTime(entry.timestamp)}</span>
</div>
<p className="text-sm">{entry.message}</p>
{entry.detail && (
<p className="mt-1 text-xs opacity-70">{entry.detail}</p>
)}
</div>
);
}
// ---------------------------------------------------------------------------
// Leaderboard Table
// ---------------------------------------------------------------------------
function LeaderboardTable({
rows,
bestRunId,
}: {
rows: LeaderboardRow[];
bestRunId: string | null;
}) {
const [sortKey, setSortKey] = useState<string>("weighted_total");
const [sortAsc, setSortAsc] = useState(false);
function handleSort(key: string) {
if (key === sortKey) {
setSortAsc(!sortAsc);
} else {
setSortKey(key);
setSortAsc(false);
}
}
const sortedRows = [...rows].sort((a, b) => {
let aVal: number;
let bVal: number;
if (sortKey === "weighted_total") {
aVal = a.weighted_total;
bVal = b.weighted_total;
} else {
aVal = a.scores[sortKey] ?? 0;
bVal = b.scores[sortKey] ?? 0;
}
return sortAsc ? aVal - bVal : bVal - aVal;
});
// Collect all score keys
const scoreKeys = Array.from(
new Set(rows.flatMap((r) => Object.keys(r.scores))),
);
return (
<div className="overflow-x-auto rounded-lg border border-slate-200 dark:border-slate-700">
<table className="w-full text-sm" data-testid="leaderboard-table">
<thead>
<tr className="bg-slate-50 dark:bg-slate-800 text-left text-xs font-medium text-slate-500 dark:text-slate-400 uppercase tracking-wider">
<th className="px-3 py-2">#</th>
<th className="px-3 py-2">Config</th>
{scoreKeys.map((k) => (
<th
key={k}
className="px-3 py-2 cursor-pointer hover:text-indigo-600 dark:hover:text-indigo-400 select-none"
onClick={() => handleSort(k)}
>
{k}
{sortKey === k ? (sortAsc ? " \u25B2" : " \u25BC") : ""}
</th>
))}
<th
className="px-3 py-2 cursor-pointer hover:text-indigo-600 dark:hover:text-indigo-400 select-none"
onClick={() => handleSort("weighted_total")}
>
Total
{sortKey === "weighted_total"
? sortAsc
? " \u25B2"
: " \u25BC"
: ""}
</th>
<th className="px-3 py-2">Status</th>
</tr>
</thead>
<tbody>
{sortedRows.map((row, idx) => (
<tr
key={row.run_id}
data-testid="leaderboard-row"
className={`border-t border-slate-100 dark:border-slate-700 transition-colors ${
row.run_id === bestRunId
? "bg-amber-50 dark:bg-amber-900/20 ring-1 ring-amber-300 dark:ring-amber-700"
: "hover:bg-slate-50 dark:hover:bg-slate-800/50"
}`}
>
<td className="px-3 py-2 font-medium text-slate-700 dark:text-slate-300">
{idx + 1}
</td>
<td className="px-3 py-2 text-slate-600 dark:text-slate-400 max-w-[180px] truncate">
{row.config_summary}
</td>
{scoreKeys.map((k) => (
<td
key={k}
className="px-3 py-2 tabular-nums text-slate-700 dark:text-slate-300"
>
{row.scores[k] != null ? row.scores[k].toFixed(3) : "—"}
</td>
))}
<td className="px-3 py-2 font-semibold tabular-nums text-slate-900 dark:text-white">
{row.weighted_total.toFixed(3)}
</td>
<td className="px-3 py-2">
<StatusBadge status={row.status} cached={row.cached} />
</td>
</tr>
))}
{sortedRows.length === 0 && (
<tr>
<td
colSpan={3 + scoreKeys.length}
className="px-3 py-6 text-center text-slate-400 dark:text-slate-500"
>
No runs yet
</td>
</tr>
)}
</tbody>
</table>
</div>
);
}
function StatusBadge({
status,
cached,
}: {
status: string;
cached: boolean;
}) {
if (cached)
return (
<span className="inline-flex items-center rounded-full bg-slate-100 dark:bg-slate-700 px-2 py-0.5 text-xs text-slate-500 dark:text-slate-400">
cached
</span>
);
if (status === "completed")
return (
<span className="inline-flex items-center rounded-full bg-green-100 dark:bg-green-900/30 px-2 py-0.5 text-xs text-green-700 dark:text-green-400">
completed
</span>
);
if (status === "running")
return (
<span className="inline-flex items-center rounded-full bg-blue-100 dark:bg-blue-900/30 px-2 py-0.5 text-xs text-blue-700 dark:text-blue-400">
running
</span>
);
if (status === "failed")
return (
<span className="inline-flex items-center rounded-full bg-red-100 dark:bg-red-900/30 px-2 py-0.5 text-xs text-red-700 dark:text-red-400">
failed
</span>
);
return (
<span className="inline-flex items-center rounded-full bg-slate-100 dark:bg-slate-700 px-2 py-0.5 text-xs text-slate-500 dark:text-slate-400">
{status}
</span>
);
}
// ---------------------------------------------------------------------------
// Steering Controls
// ---------------------------------------------------------------------------
function SteeringControls({
experimentId,
experimentStatus,
progress,
onStatusChange,
}: {
experimentId: string;
experimentStatus: string;
progress: { completed: number; total: number; cache_hits: number; tokens_total: number; cost_total: number };
onStatusChange: (status: string) => void;
}) {
const [confirming, setConfirming] = useState<"pause" | "stop" | null>(null);
const [actionLoading, setActionLoading] = useState(false);
async function handleAction(action: "pause" | "resume" | "stop") {
setActionLoading(true);
try {
if (action === "pause") {
await experiments.pause(experimentId);
onStatusChange("paused");
} else if (action === "resume") {
await experiments.resume(experimentId);
onStatusChange("running");
} else if (action === "stop") {
await experiments.stop(experimentId);
onStatusChange("stopped");
}
} catch {
// Silently handle — status will sync via WS
} finally {
setActionLoading(false);
setConfirming(null);
}
}
const isRunning = experimentStatus === "running" || experimentStatus === "sweeping";
const isPaused = experimentStatus === "paused";
const pct =
progress.total > 0
? Math.round((progress.completed / progress.total) * 100)
: 0;
const cacheRate =
progress.completed > 0
? Math.round((progress.cache_hits / progress.completed) * 100)
: 0;
return (
<div data-testid="steering-controls" className="space-y-4">
{/* Action buttons */}
<div className="flex flex-wrap gap-2">
{isRunning && confirming !== "pause" && (
<button
type="button"
onClick={() => setConfirming("pause")}
disabled={actionLoading}
className="rounded-lg bg-amber-500 px-3 py-2 text-sm font-medium text-white hover:bg-amber-400 transition disabled:opacity-50"
>
Pause
</button>
)}
{isRunning && confirming === "pause" && (
<div className="flex items-center gap-2">
<span className="text-sm text-amber-600 dark:text-amber-400">
Pause sweep?
</span>
<button
type="button"
onClick={() => handleAction("pause")}
disabled={actionLoading}
className="rounded-lg bg-amber-500 px-3 py-1.5 text-xs font-medium text-white hover:bg-amber-400 transition"
>
Confirm
</button>
<button
type="button"
onClick={() => setConfirming(null)}
className="text-xs text-slate-500 hover:text-slate-700 dark:hover:text-slate-300"
>
Cancel
</button>
</div>
)}
{isPaused && (
<button
type="button"
onClick={() => handleAction("resume")}
disabled={actionLoading}
className="rounded-lg bg-green-600 px-3 py-2 text-sm font-medium text-white hover:bg-green-500 transition disabled:opacity-50"
>
Resume
</button>
)}
{(isRunning || isPaused) && confirming !== "stop" && (
<button
type="button"
onClick={() => setConfirming("stop")}
disabled={actionLoading}
className="rounded-lg bg-red-600 px-3 py-2 text-sm font-medium text-white hover:bg-red-500 transition disabled:opacity-50"
>
Stop
</button>
)}
{(isRunning || isPaused) && confirming === "stop" && (
<div className="flex items-center gap-2">
<span className="text-sm text-red-600 dark:text-red-400">
Stop sweep?
</span>
<button
type="button"
onClick={() => handleAction("stop")}
disabled={actionLoading}
className="rounded-lg bg-red-600 px-3 py-1.5 text-xs font-medium text-white hover:bg-red-500 transition"
>
Confirm
</button>
<button
type="button"
onClick={() => setConfirming(null)}
className="text-xs text-slate-500 hover:text-slate-700 dark:hover:text-slate-300"
>
Cancel
</button>
</div>
)}
</div>
{/* Progress bar */}
<div>
<div className="flex items-center justify-between text-xs text-slate-500 dark:text-slate-400 mb-1">
<span>
{progress.completed} / {progress.total} runs
</span>
<span>{pct}%</span>
</div>
<div className="w-full h-2 rounded-full bg-slate-200 dark:bg-slate-700 overflow-hidden">
<div
data-testid="progress-bar-fill"
className="h-full rounded-full bg-indigo-500 transition-all duration-500"
style={{ width: `${pct}%` }}
/>
</div>
</div>
{/* Stats */}
<div className="grid grid-cols-2 gap-3 text-sm">
<div className="rounded-lg bg-slate-50 dark:bg-slate-800 p-2.5">
<div className="text-xs text-slate-400 dark:text-slate-500">Tokens</div>
<div className="font-semibold text-slate-700 dark:text-slate-300 tabular-nums">
{progress.tokens_total.toLocaleString()}
</div>
</div>
<div className="rounded-lg bg-slate-50 dark:bg-slate-800 p-2.5">
<div className="text-xs text-slate-400 dark:text-slate-500">Est. Cost</div>
<div className="font-semibold text-slate-700 dark:text-slate-300 tabular-nums">
${progress.cost_total.toFixed(4)}
</div>
</div>
<div className="rounded-lg bg-slate-50 dark:bg-slate-800 p-2.5">
<div className="text-xs text-slate-400 dark:text-slate-500">Cache Rate</div>
<div className="font-semibold text-slate-700 dark:text-slate-300 tabular-nums">
{cacheRate}%
</div>
</div>
<div className="rounded-lg bg-slate-50 dark:bg-slate-800 p-2.5">
<div className="text-xs text-slate-400 dark:text-slate-500">Status</div>
<div className="font-semibold text-slate-700 dark:text-slate-300 capitalize">
{experimentStatus}
</div>
</div>
</div>
</div>
);
}
// ---------------------------------------------------------------------------
// Connection Status Indicator
// ---------------------------------------------------------------------------
function ConnectionIndicator({ status }: { status: ConnectionStatus }) {
const colors: Record<ConnectionStatus, string> = {
connected: "bg-green-500",
connecting: "bg-amber-500 animate-pulse",
disconnected: "bg-red-500",
};
return (
<span className="inline-flex items-center gap-1.5 text-xs text-slate-500 dark:text-slate-400">
<span
data-testid="connection-indicator"
className={`h-2 w-2 rounded-full ${colors[status]}`}
/>
{status === "connected"
? "Live"
: status === "connecting"
? "Connecting…"
: "Disconnected"}
</span>
);
}
// ---------------------------------------------------------------------------
// Live Page
// ---------------------------------------------------------------------------
export default function LivePage() {
const { id } = useParams<{ id: string }>();
const navigate = useNavigate();
// Experiment state
const [experiment, setExperiment] = useState<ExperimentResponse | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [expStatus, setExpStatus] = useState("idle");
// WebSocket state
const [connectionStatus, setConnectionStatus] =
useState<ConnectionStatus>("disconnected");
const wsRef = useRef<WsConnection | null>(null);
const reconnectTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const reconnectAttemptRef = useRef(0);
// Timeline
const [timeline, setTimeline] = useState<TimelineEntry[]>([]);
const [autoScroll, setAutoScroll] = useState(true);
const [eventFilter, setEventFilter] = useState<WsEventType | "all">("all");
const timelineEndRef = useRef<HTMLDivElement | null>(null);
// Leaderboard
const [leaderboard, setLeaderboard] = useState<LeaderboardRow[]>([]);
const [bestRunId, setBestRunId] = useState<string | null>(null);
// Progress
const [progress, setProgress] = useState({
completed: 0,
total: 0,
cache_hits: 0,
tokens_total: 0,
cost_total: 0,
});
// -------------------------------------------------------------------------
// Load experiment
// -------------------------------------------------------------------------
const loadExperiment = useCallback(async () => {
if (!id) return;
setLoading(true);
setError(null);
try {
const exp = await experiments.get(id);
setExperiment(exp);
setExpStatus(exp.status);
} catch (err: unknown) {
if (err instanceof ApiError) {
setError(`Failed to load experiment (${err.status}).`);
} else {
setError("Network error. Is the server running?");
}
} finally {
setLoading(false);
}
}, [id]);
// -------------------------------------------------------------------------
// Load initial leaderboard
// -------------------------------------------------------------------------
const loadLeaderboard = useCallback(async () => {
if (!id) return;
try {
const resp = await runsApi.leaderboard(id);
const rows: LeaderboardRow[] = resp.items.map((r: RunResponse) => ({
run_id: r.id,
config_summary: configSummary(r.config),
scores: {},
weighted_total: 0,
status: r.status,
cached: false,
}));
setLeaderboard(rows);
if (rows.length > 0) {
setBestRunId(rows[0].run_id);
}
} catch {
// Non-critical — leaderboard will populate via WebSocket
}
}, [id]);
useEffect(() => {
loadExperiment();
loadLeaderboard();
}, [loadExperiment, loadLeaderboard]);
// -------------------------------------------------------------------------
// Process incoming WS events
// -------------------------------------------------------------------------
const processEvent = useCallback(
(raw: unknown) => {
const evt = raw as WsEvent;
if (!evt.type || evt.type === "ack") return;
// Filter events not for this experiment (if the server sends global events)
if (evt.experiment_id && evt.experiment_id !== id) return;
const now = new Date(evt.timestamp ?? Date.now());
// Build timeline entry
let message = "";
let detail: string | undefined;
switch (evt.type) {
case "run.started":
message = `Run started: ${configSummary(evt.config)}`;
break;
case "run.completed":
message = `Run completed: ${configSummary(evt.config)}`;
detail =
evt.weighted_total != null
? `Score: ${evt.weighted_total.toFixed(3)}`
: undefined;
break;
case "new_best_found":
message = `New best config found!`;
detail =
evt.weighted_total != null
? `Score: ${evt.weighted_total.toFixed(3)}`
: undefined;
break;
case "cache_hit":
message = `Cache hit: ${configSummary(evt.config)}`;
break;
case "run.failed":
message = `Run failed: ${evt.error ?? "unknown error"}`;
break;
case "sweep.progress":
message = `Progress: ${evt.progress?.completed ?? 0}/${evt.progress?.total ?? 0} runs`;
break;
case "sweep.completed":
message = "Sweep completed!";
setExpStatus("completed");
break;
default:
message = evt.type;
}
setTimeline((prev) => [
...prev,
{
id: nextTimelineId(),
type: evt.type,
run_id: evt.run_id,
message,
detail,
timestamp: now,
},
]);
// Update leaderboard
if (
(evt.type === "run.completed" || evt.type === "new_best_found") &&
evt.run_id
) {
const row: LeaderboardRow = {
run_id: evt.run_id,
config_summary: configSummary(evt.config),
scores: evt.scores ?? {},
weighted_total: evt.weighted_total ?? 0,
status: "completed",
cached: evt.cached ?? false,
};
setLeaderboard((prev) => {
const existing = prev.findIndex((r) => r.run_id === row.run_id);
if (existing >= 0) {
const next = [...prev];
next[existing] = row;
return next;
}
return [...prev, row];
});
if (evt.type === "new_best_found") {
setBestRunId(evt.run_id);
}
}
if (evt.type === "cache_hit" && evt.run_id) {
const row: LeaderboardRow = {
run_id: evt.run_id,
config_summary: configSummary(evt.config),
scores: evt.scores ?? {},
weighted_total: evt.weighted_total ?? 0,
status: "completed",
cached: true,
};
setLeaderboard((prev) => {
const existing = prev.findIndex((r) => r.run_id === row.run_id);
if (existing >= 0) return prev;
return [...prev, row];
});
}
// Update progress
if (evt.progress) {
setProgress(evt.progress);
}
},
[id],
);
// -------------------------------------------------------------------------
// WebSocket connection with exponential backoff reconnect
// -------------------------------------------------------------------------
const connectWs = useCallback(() => {
if (wsRef.current) {
wsRef.current.close();
}
setConnectionStatus("connecting");
const conn = connectWebSocket(
(data) => processEvent(data),
() => {
// onClose
setConnectionStatus("disconnected");
wsRef.current = null;
// Reconnect with exponential backoff
const attempt = reconnectAttemptRef.current;
const delay = Math.min(1000 * Math.pow(2, attempt), 30000);
reconnectAttemptRef.current = attempt + 1;
reconnectTimerRef.current = setTimeout(() => {
connectWs();
}, delay);
},
);
wsRef.current = conn;
setConnectionStatus("connected");
reconnectAttemptRef.current = 0;
// Subscribe to experiment events
if (id) {
conn.send({ type: "subscribe", experiment_id: id });
}
}, [id, processEvent]);
useEffect(() => {
if (!id || loading || error) return;
connectWs();
return () => {
if (reconnectTimerRef.current) {
clearTimeout(reconnectTimerRef.current);
}
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
};
}, [id, loading, error, connectWs]);
// -------------------------------------------------------------------------
// Auto-scroll timeline
// -------------------------------------------------------------------------
useEffect(() => {
if (autoScroll && timelineEndRef.current) {
timelineEndRef.current.scrollIntoView?.({ behavior: "smooth" });
}
}, [timeline, autoScroll]);
// -------------------------------------------------------------------------
// Filter timeline
// -------------------------------------------------------------------------
const filteredTimeline =
eventFilter === "all"
? timeline
: timeline.filter((e) => e.type === eventFilter);
// -------------------------------------------------------------------------
// Render
// -------------------------------------------------------------------------
if (loading) {
return (
<div className="min-h-screen bg-gradient-to-br from-indigo-50 to-slate-100 dark:from-slate-900 dark:to-slate-800 flex items-center justify-center">
<p className="text-slate-500 dark:text-slate-400 animate-pulse">
Loading experiment
</p>
</div>
);
}
if (error) {
return (
<div className="min-h-screen bg-gradient-to-br from-indigo-50 to-slate-100 dark:from-slate-900 dark:to-slate-800 px-4 py-8">
<div className="mx-auto max-w-2xl">
<div
role="alert"
className="rounded-xl bg-red-50 dark:bg-red-900/30 border border-red-200 dark:border-red-800 p-6 text-center"
>
<p className="text-red-700 dark:text-red-300">{error}</p>
<button
type="button"
onClick={loadExperiment}
className="mt-3 text-sm font-medium text-indigo-600 dark:text-indigo-400 hover:text-indigo-500"
>
Retry
</button>
</div>
</div>
</div>
);
}
return (
<div className="min-h-screen bg-gradient-to-br from-indigo-50 to-slate-100 dark:from-slate-900 dark:to-slate-800 px-4 py-6">
<div className="mx-auto max-w-7xl">
{/* Header */}
<div className="mb-6 flex flex-wrap items-center justify-between gap-4">
<div>
<div className="flex items-center gap-3">
<Link
to={`/experiments/${id}`}
className="text-sm text-indigo-600 dark:text-indigo-400 hover:text-indigo-500"
>
&larr; Experiment
</Link>
<ConnectionIndicator status={connectionStatus} />
</div>
<h1 className="mt-1 text-2xl font-bold text-slate-900 dark:text-white">
{experiment?.name ?? "Live Dashboard"}
</h1>
{experiment?.description && (
<p className="mt-0.5 text-sm text-slate-500 dark:text-slate-400">
{experiment.description}
</p>
)}
</div>
</div>
{/* Main layout: 60/40 split */}
<div className="grid grid-cols-1 lg:grid-cols-5 gap-6">
{/* Left column — 60% */}
<div className="lg:col-span-3 space-y-6">
{/* Activity Timeline */}
<div className="rounded-xl bg-white dark:bg-slate-800 shadow ring-1 ring-slate-200 dark:ring-slate-700 overflow-hidden">
<div className="flex items-center justify-between border-b border-slate-200 dark:border-slate-700 px-4 py-3">
<h2 className="text-lg font-semibold text-slate-900 dark:text-white">
Activity Timeline
</h2>
<div className="flex items-center gap-2">
{/* Filter */}
<select
data-testid="event-filter"
value={eventFilter}
onChange={(e) =>
setEventFilter(e.target.value as WsEventType | "all")
}
className="rounded-md border border-slate-300 dark:border-slate-600 bg-white dark:bg-slate-700 px-2 py-1 text-xs text-slate-700 dark:text-slate-300"
>
<option value="all">All events</option>
<option value="run.started">Started</option>
<option value="run.completed">Completed</option>
<option value="new_best_found">New best</option>
<option value="cache_hit">Cache hits</option>
<option value="run.failed">Failed</option>
</select>
{/* Pause scroll */}
<button
type="button"
data-testid="toggle-autoscroll"
onClick={() => setAutoScroll(!autoScroll)}
className={`rounded-md px-2 py-1 text-xs font-medium transition ${
autoScroll
? "bg-indigo-100 dark:bg-indigo-900/40 text-indigo-700 dark:text-indigo-300"
: "bg-slate-100 dark:bg-slate-700 text-slate-500 dark:text-slate-400"
}`}
>
{autoScroll ? "Auto-scroll ON" : "Auto-scroll OFF"}
</button>
</div>
</div>
<div
data-testid="timeline-container"
className="max-h-[500px] overflow-y-auto p-4 space-y-2"
>
{filteredTimeline.length === 0 && (
<p className="text-center text-sm text-slate-400 dark:text-slate-500 py-8">
{timeline.length === 0
? "Waiting for events…"
: "No matching events"}
</p>
)}
{filteredTimeline.map((entry) => (
<TimelineCard key={entry.id} entry={entry} />
))}
<div ref={timelineEndRef} />
</div>
</div>
</div>
{/* Right column — 40% */}
<div className="lg:col-span-2 space-y-6">
{/* Steering Controls */}
<div className="rounded-xl bg-white dark:bg-slate-800 shadow ring-1 ring-slate-200 dark:ring-slate-700 p-4">
<h2 className="text-lg font-semibold text-slate-900 dark:text-white mb-4">
Controls
</h2>
<SteeringControls
experimentId={id!}
experimentStatus={expStatus}
progress={progress}
onStatusChange={setExpStatus}
/>
</div>
{/* Leaderboard */}
<div className="rounded-xl bg-white dark:bg-slate-800 shadow ring-1 ring-slate-200 dark:ring-slate-700 overflow-hidden">
<div className="border-b border-slate-200 dark:border-slate-700 px-4 py-3">
<h2 className="text-lg font-semibold text-slate-900 dark:text-white">
Leaderboard
</h2>
</div>
<div className="p-4">
<LeaderboardTable rows={leaderboard} bestRunId={bestRunId} />
</div>
</div>
</div>
</div>
</div>
</div>
);
}