feat: Store LLM request params (max_tokens, model, modality) in pipeline events
- _make_llm_callback now accepts request_params dict - All 6 LLM call sites pass max_tokens, model_override, modality, response_model, hard_limit - request_params stored in payload JSONB on every llm_call event (always, not just debug mode) - Frontend JSON export includes full payload + request_params at top level - DebugPayloadViewer shows 'Request Params' section even with debug mode off - Answers whether max_tokens is actually being sent on pipeline requests
This commit is contained in:
parent
a673e641b8
commit
d58194ff96
2 changed files with 26 additions and 3 deletions
|
|
@ -141,11 +141,19 @@ def _make_llm_callback(
|
|||
user_prompt: str | None = None,
|
||||
run_id: str | None = None,
|
||||
context_label: str | None = None,
|
||||
request_params: dict | None = None,
|
||||
):
|
||||
"""Create an on_complete callback for LLMClient that emits llm_call events.
|
||||
|
||||
When debug mode is enabled, captures full system prompt, user prompt,
|
||||
and response text on each llm_call event.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
request_params:
|
||||
Dict of LLM request parameters (max_tokens, model_override, modality,
|
||||
response_model, temperature, etc.) to store in the event payload for
|
||||
debugging which parameters were actually sent to the API.
|
||||
"""
|
||||
debug = _is_debug_mode()
|
||||
|
||||
|
|
@ -169,6 +177,7 @@ def _make_llm_callback(
|
|||
"finish_reason": finish_reason,
|
||||
"is_fallback": is_fallback,
|
||||
**({"context": context_label} if context_label else {}),
|
||||
**({"request_params": request_params} if request_params else {}),
|
||||
},
|
||||
system_prompt_text=system_prompt if debug else None,
|
||||
user_prompt_text=user_prompt if debug else None,
|
||||
|
|
@ -367,7 +376,8 @@ def stage2_segmentation(self, video_id: str, run_id: str | None = None) -> str:
|
|||
hard_limit = get_settings().llm_max_tokens_hard_limit
|
||||
max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage2_segmentation", hard_limit=hard_limit)
|
||||
logger.info("Stage 2 using model=%s, modality=%s, max_tokens=%d", model_override or "default", modality, max_tokens)
|
||||
raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id),
|
||||
_s2_request_params = {"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "SegmentationResult", "hard_limit": hard_limit}
|
||||
raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, request_params=_s2_request_params),
|
||||
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
||||
result = _safe_parse_llm_response(raw, SegmentationResult, llm, system_prompt, user_prompt,
|
||||
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
||||
|
|
@ -459,7 +469,8 @@ def stage3_extraction(self, video_id: str, run_id: str | None = None) -> str:
|
|||
)
|
||||
|
||||
max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage3_extraction", hard_limit=hard_limit)
|
||||
raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=topic_label),
|
||||
_s3_request_params = {"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "ExtractionResult", "hard_limit": hard_limit}
|
||||
raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=topic_label, request_params=_s3_request_params),
|
||||
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
||||
result = _safe_parse_llm_response(raw, ExtractionResult, llm, system_prompt, user_prompt,
|
||||
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
||||
|
|
@ -556,6 +567,7 @@ def _classify_moment_batch(
|
|||
video_id, "stage4_classification",
|
||||
system_prompt=system_prompt, user_prompt=user_prompt,
|
||||
run_id=run_id, context_label=batch_label,
|
||||
request_params={"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "ClassificationResult", "hard_limit": hard_limit},
|
||||
),
|
||||
modality=modality, model_override=model_override,
|
||||
max_tokens=max_tokens,
|
||||
|
|
@ -865,6 +877,7 @@ def _synthesize_chunk(
|
|||
video_id, "stage5_synthesis",
|
||||
system_prompt=system_prompt, user_prompt=user_prompt,
|
||||
run_id=run_id, context_label=chunk_label,
|
||||
request_params={"max_tokens": estimated_input, "model_override": model_override, "modality": modality, "response_model": "SynthesisResult", "hard_limit": hard_limit},
|
||||
),
|
||||
modality=modality, model_override=model_override, max_tokens=estimated_input,
|
||||
)
|
||||
|
|
@ -946,6 +959,7 @@ def _merge_pages_by_slug(
|
|||
system_prompt=merge_system_prompt,
|
||||
user_prompt=merge_user_prompt,
|
||||
run_id=run_id, context_label=f"merge:{slug}",
|
||||
request_params={"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "SynthesisResult", "hard_limit": hard_limit},
|
||||
),
|
||||
modality=modality, model_override=model_override,
|
||||
max_tokens=max_tokens,
|
||||
|
|
|
|||
|
|
@ -131,6 +131,7 @@ interface DebugSection {
|
|||
|
||||
function DebugPayloadViewer({ event }: { event: PipelineEvent }) {
|
||||
const sections: DebugSection[] = [];
|
||||
if (event.payload?.request_params) sections.push({ label: "Request Params", content: JSON.stringify(event.payload.request_params, null, 2) });
|
||||
if (event.system_prompt_text) sections.push({ label: "System Prompt", content: event.system_prompt_text });
|
||||
if (event.user_prompt_text) sections.push({ label: "User Prompt", content: event.user_prompt_text });
|
||||
if (event.response_text) sections.push({ label: "Response", content: event.response_text });
|
||||
|
|
@ -171,11 +172,19 @@ function DebugPayloadViewer({ event }: { event: PipelineEvent }) {
|
|||
};
|
||||
|
||||
const exportAsJson = () => {
|
||||
const data: Record<string, string | null> = {
|
||||
// Dump everything — full rawdog debug payload with request params
|
||||
const data: Record<string, unknown> = {
|
||||
event_id: event.id,
|
||||
stage: event.stage,
|
||||
event_type: event.event_type,
|
||||
model: event.model,
|
||||
prompt_tokens: event.prompt_tokens,
|
||||
completion_tokens: event.completion_tokens,
|
||||
total_tokens: event.total_tokens,
|
||||
duration_ms: event.duration_ms,
|
||||
created_at: event.created_at,
|
||||
payload: event.payload,
|
||||
request_params: event.payload?.request_params ?? null,
|
||||
system_prompt_text: event.system_prompt_text,
|
||||
user_prompt_text: event.user_prompt_text,
|
||||
response_text: event.response_text,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue