feat: Store LLM request params (max_tokens, model, modality) in pipeline events

- _make_llm_callback now accepts request_params dict
- All 6 LLM call sites pass max_tokens, model_override, modality, response_model, hard_limit
- request_params stored in payload JSONB on every llm_call event (always, not just debug mode)
- Frontend JSON export includes full payload + request_params at top level
- DebugPayloadViewer shows 'Request Params' section even with debug mode off
- Answers whether max_tokens is actually being sent on pipeline requests
This commit is contained in:
jlightner 2026-04-01 07:01:57 +00:00
parent a673e641b8
commit d58194ff96
2 changed files with 26 additions and 3 deletions

View file

@ -141,11 +141,19 @@ def _make_llm_callback(
user_prompt: str | None = None, user_prompt: str | None = None,
run_id: str | None = None, run_id: str | None = None,
context_label: str | None = None, context_label: str | None = None,
request_params: dict | None = None,
): ):
"""Create an on_complete callback for LLMClient that emits llm_call events. """Create an on_complete callback for LLMClient that emits llm_call events.
When debug mode is enabled, captures full system prompt, user prompt, When debug mode is enabled, captures full system prompt, user prompt,
and response text on each llm_call event. and response text on each llm_call event.
Parameters
----------
request_params:
Dict of LLM request parameters (max_tokens, model_override, modality,
response_model, temperature, etc.) to store in the event payload for
debugging which parameters were actually sent to the API.
""" """
debug = _is_debug_mode() debug = _is_debug_mode()
@ -169,6 +177,7 @@ def _make_llm_callback(
"finish_reason": finish_reason, "finish_reason": finish_reason,
"is_fallback": is_fallback, "is_fallback": is_fallback,
**({"context": context_label} if context_label else {}), **({"context": context_label} if context_label else {}),
**({"request_params": request_params} if request_params else {}),
}, },
system_prompt_text=system_prompt if debug else None, system_prompt_text=system_prompt if debug else None,
user_prompt_text=user_prompt if debug else None, user_prompt_text=user_prompt if debug else None,
@ -367,7 +376,8 @@ def stage2_segmentation(self, video_id: str, run_id: str | None = None) -> str:
hard_limit = get_settings().llm_max_tokens_hard_limit hard_limit = get_settings().llm_max_tokens_hard_limit
max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage2_segmentation", hard_limit=hard_limit) max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage2_segmentation", hard_limit=hard_limit)
logger.info("Stage 2 using model=%s, modality=%s, max_tokens=%d", model_override or "default", modality, max_tokens) logger.info("Stage 2 using model=%s, modality=%s, max_tokens=%d", model_override or "default", modality, max_tokens)
raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id), _s2_request_params = {"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "SegmentationResult", "hard_limit": hard_limit}
raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, request_params=_s2_request_params),
modality=modality, model_override=model_override, max_tokens=max_tokens) modality=modality, model_override=model_override, max_tokens=max_tokens)
result = _safe_parse_llm_response(raw, SegmentationResult, llm, system_prompt, user_prompt, result = _safe_parse_llm_response(raw, SegmentationResult, llm, system_prompt, user_prompt,
modality=modality, model_override=model_override, max_tokens=max_tokens) modality=modality, model_override=model_override, max_tokens=max_tokens)
@ -459,7 +469,8 @@ def stage3_extraction(self, video_id: str, run_id: str | None = None) -> str:
) )
max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage3_extraction", hard_limit=hard_limit) max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage3_extraction", hard_limit=hard_limit)
raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=topic_label), _s3_request_params = {"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "ExtractionResult", "hard_limit": hard_limit}
raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=topic_label, request_params=_s3_request_params),
modality=modality, model_override=model_override, max_tokens=max_tokens) modality=modality, model_override=model_override, max_tokens=max_tokens)
result = _safe_parse_llm_response(raw, ExtractionResult, llm, system_prompt, user_prompt, result = _safe_parse_llm_response(raw, ExtractionResult, llm, system_prompt, user_prompt,
modality=modality, model_override=model_override, max_tokens=max_tokens) modality=modality, model_override=model_override, max_tokens=max_tokens)
@ -556,6 +567,7 @@ def _classify_moment_batch(
video_id, "stage4_classification", video_id, "stage4_classification",
system_prompt=system_prompt, user_prompt=user_prompt, system_prompt=system_prompt, user_prompt=user_prompt,
run_id=run_id, context_label=batch_label, run_id=run_id, context_label=batch_label,
request_params={"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "ClassificationResult", "hard_limit": hard_limit},
), ),
modality=modality, model_override=model_override, modality=modality, model_override=model_override,
max_tokens=max_tokens, max_tokens=max_tokens,
@ -865,6 +877,7 @@ def _synthesize_chunk(
video_id, "stage5_synthesis", video_id, "stage5_synthesis",
system_prompt=system_prompt, user_prompt=user_prompt, system_prompt=system_prompt, user_prompt=user_prompt,
run_id=run_id, context_label=chunk_label, run_id=run_id, context_label=chunk_label,
request_params={"max_tokens": estimated_input, "model_override": model_override, "modality": modality, "response_model": "SynthesisResult", "hard_limit": hard_limit},
), ),
modality=modality, model_override=model_override, max_tokens=estimated_input, modality=modality, model_override=model_override, max_tokens=estimated_input,
) )
@ -946,6 +959,7 @@ def _merge_pages_by_slug(
system_prompt=merge_system_prompt, system_prompt=merge_system_prompt,
user_prompt=merge_user_prompt, user_prompt=merge_user_prompt,
run_id=run_id, context_label=f"merge:{slug}", run_id=run_id, context_label=f"merge:{slug}",
request_params={"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "SynthesisResult", "hard_limit": hard_limit},
), ),
modality=modality, model_override=model_override, modality=modality, model_override=model_override,
max_tokens=max_tokens, max_tokens=max_tokens,

View file

@ -131,6 +131,7 @@ interface DebugSection {
function DebugPayloadViewer({ event }: { event: PipelineEvent }) { function DebugPayloadViewer({ event }: { event: PipelineEvent }) {
const sections: DebugSection[] = []; const sections: DebugSection[] = [];
if (event.payload?.request_params) sections.push({ label: "Request Params", content: JSON.stringify(event.payload.request_params, null, 2) });
if (event.system_prompt_text) sections.push({ label: "System Prompt", content: event.system_prompt_text }); if (event.system_prompt_text) sections.push({ label: "System Prompt", content: event.system_prompt_text });
if (event.user_prompt_text) sections.push({ label: "User Prompt", content: event.user_prompt_text }); if (event.user_prompt_text) sections.push({ label: "User Prompt", content: event.user_prompt_text });
if (event.response_text) sections.push({ label: "Response", content: event.response_text }); if (event.response_text) sections.push({ label: "Response", content: event.response_text });
@ -171,11 +172,19 @@ function DebugPayloadViewer({ event }: { event: PipelineEvent }) {
}; };
const exportAsJson = () => { const exportAsJson = () => {
const data: Record<string, string | null> = { // Dump everything — full rawdog debug payload with request params
const data: Record<string, unknown> = {
event_id: event.id, event_id: event.id,
stage: event.stage, stage: event.stage,
event_type: event.event_type, event_type: event.event_type,
model: event.model, model: event.model,
prompt_tokens: event.prompt_tokens,
completion_tokens: event.completion_tokens,
total_tokens: event.total_tokens,
duration_ms: event.duration_ms,
created_at: event.created_at,
payload: event.payload,
request_params: event.payload?.request_params ?? null,
system_prompt_text: event.system_prompt_text, system_prompt_text: event.system_prompt_text,
user_prompt_text: event.user_prompt_text, user_prompt_text: event.user_prompt_text,
response_text: event.response_text, response_text: event.response_text,