diff --git a/backend/pipeline/stages.py b/backend/pipeline/stages.py index 740c179..4a4a788 100644 --- a/backend/pipeline/stages.py +++ b/backend/pipeline/stages.py @@ -141,11 +141,19 @@ def _make_llm_callback( user_prompt: str | None = None, run_id: str | None = None, context_label: str | None = None, + request_params: dict | None = None, ): """Create an on_complete callback for LLMClient that emits llm_call events. When debug mode is enabled, captures full system prompt, user prompt, and response text on each llm_call event. + + Parameters + ---------- + request_params: + Dict of LLM request parameters (max_tokens, model_override, modality, + response_model, temperature, etc.) to store in the event payload for + debugging which parameters were actually sent to the API. """ debug = _is_debug_mode() @@ -169,6 +177,7 @@ def _make_llm_callback( "finish_reason": finish_reason, "is_fallback": is_fallback, **({"context": context_label} if context_label else {}), + **({"request_params": request_params} if request_params else {}), }, system_prompt_text=system_prompt if debug else None, user_prompt_text=user_prompt if debug else None, @@ -367,7 +376,8 @@ def stage2_segmentation(self, video_id: str, run_id: str | None = None) -> str: hard_limit = get_settings().llm_max_tokens_hard_limit max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage2_segmentation", hard_limit=hard_limit) logger.info("Stage 2 using model=%s, modality=%s, max_tokens=%d", model_override or "default", modality, max_tokens) - raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id), + _s2_request_params = {"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "SegmentationResult", "hard_limit": hard_limit} + raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, request_params=_s2_request_params), modality=modality, model_override=model_override, max_tokens=max_tokens) result = _safe_parse_llm_response(raw, SegmentationResult, llm, system_prompt, user_prompt, modality=modality, model_override=model_override, max_tokens=max_tokens) @@ -459,7 +469,8 @@ def stage3_extraction(self, video_id: str, run_id: str | None = None) -> str: ) max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage3_extraction", hard_limit=hard_limit) - raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=topic_label), + _s3_request_params = {"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "ExtractionResult", "hard_limit": hard_limit} + raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=topic_label, request_params=_s3_request_params), modality=modality, model_override=model_override, max_tokens=max_tokens) result = _safe_parse_llm_response(raw, ExtractionResult, llm, system_prompt, user_prompt, modality=modality, model_override=model_override, max_tokens=max_tokens) @@ -556,6 +567,7 @@ def _classify_moment_batch( video_id, "stage4_classification", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=batch_label, + request_params={"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "ClassificationResult", "hard_limit": hard_limit}, ), modality=modality, model_override=model_override, max_tokens=max_tokens, @@ -865,6 +877,7 @@ def _synthesize_chunk( video_id, "stage5_synthesis", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=chunk_label, + request_params={"max_tokens": estimated_input, "model_override": model_override, "modality": modality, "response_model": "SynthesisResult", "hard_limit": hard_limit}, ), modality=modality, model_override=model_override, max_tokens=estimated_input, ) @@ -946,6 +959,7 @@ def _merge_pages_by_slug( system_prompt=merge_system_prompt, user_prompt=merge_user_prompt, run_id=run_id, context_label=f"merge:{slug}", + request_params={"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "SynthesisResult", "hard_limit": hard_limit}, ), modality=modality, model_override=model_override, max_tokens=max_tokens, diff --git a/frontend/src/pages/AdminPipeline.tsx b/frontend/src/pages/AdminPipeline.tsx index 1c4f1e2..bb16be3 100644 --- a/frontend/src/pages/AdminPipeline.tsx +++ b/frontend/src/pages/AdminPipeline.tsx @@ -131,6 +131,7 @@ interface DebugSection { function DebugPayloadViewer({ event }: { event: PipelineEvent }) { const sections: DebugSection[] = []; + if (event.payload?.request_params) sections.push({ label: "Request Params", content: JSON.stringify(event.payload.request_params, null, 2) }); if (event.system_prompt_text) sections.push({ label: "System Prompt", content: event.system_prompt_text }); if (event.user_prompt_text) sections.push({ label: "User Prompt", content: event.user_prompt_text }); if (event.response_text) sections.push({ label: "Response", content: event.response_text }); @@ -171,11 +172,19 @@ function DebugPayloadViewer({ event }: { event: PipelineEvent }) { }; const exportAsJson = () => { - const data: Record = { + // Dump everything — full rawdog debug payload with request params + const data: Record = { event_id: event.id, stage: event.stage, event_type: event.event_type, model: event.model, + prompt_tokens: event.prompt_tokens, + completion_tokens: event.completion_tokens, + total_tokens: event.total_tokens, + duration_ms: event.duration_ms, + created_at: event.created_at, + payload: event.payload, + request_params: event.payload?.request_params ?? null, system_prompt_text: event.system_prompt_text, user_prompt_text: event.user_prompt_text, response_text: event.response_text,