feat: Store LLM request params (max_tokens, model, modality) in pipeline events
- _make_llm_callback now accepts request_params dict - All 6 LLM call sites pass max_tokens, model_override, modality, response_model, hard_limit - request_params stored in payload JSONB on every llm_call event (always, not just debug mode) - Frontend JSON export includes full payload + request_params at top level - DebugPayloadViewer shows 'Request Params' section even with debug mode off - Answers whether max_tokens is actually being sent on pipeline requests
This commit is contained in:
parent
a673e641b8
commit
d58194ff96
2 changed files with 26 additions and 3 deletions
|
|
@ -141,11 +141,19 @@ def _make_llm_callback(
|
||||||
user_prompt: str | None = None,
|
user_prompt: str | None = None,
|
||||||
run_id: str | None = None,
|
run_id: str | None = None,
|
||||||
context_label: str | None = None,
|
context_label: str | None = None,
|
||||||
|
request_params: dict | None = None,
|
||||||
):
|
):
|
||||||
"""Create an on_complete callback for LLMClient that emits llm_call events.
|
"""Create an on_complete callback for LLMClient that emits llm_call events.
|
||||||
|
|
||||||
When debug mode is enabled, captures full system prompt, user prompt,
|
When debug mode is enabled, captures full system prompt, user prompt,
|
||||||
and response text on each llm_call event.
|
and response text on each llm_call event.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
request_params:
|
||||||
|
Dict of LLM request parameters (max_tokens, model_override, modality,
|
||||||
|
response_model, temperature, etc.) to store in the event payload for
|
||||||
|
debugging which parameters were actually sent to the API.
|
||||||
"""
|
"""
|
||||||
debug = _is_debug_mode()
|
debug = _is_debug_mode()
|
||||||
|
|
||||||
|
|
@ -169,6 +177,7 @@ def _make_llm_callback(
|
||||||
"finish_reason": finish_reason,
|
"finish_reason": finish_reason,
|
||||||
"is_fallback": is_fallback,
|
"is_fallback": is_fallback,
|
||||||
**({"context": context_label} if context_label else {}),
|
**({"context": context_label} if context_label else {}),
|
||||||
|
**({"request_params": request_params} if request_params else {}),
|
||||||
},
|
},
|
||||||
system_prompt_text=system_prompt if debug else None,
|
system_prompt_text=system_prompt if debug else None,
|
||||||
user_prompt_text=user_prompt if debug else None,
|
user_prompt_text=user_prompt if debug else None,
|
||||||
|
|
@ -367,7 +376,8 @@ def stage2_segmentation(self, video_id: str, run_id: str | None = None) -> str:
|
||||||
hard_limit = get_settings().llm_max_tokens_hard_limit
|
hard_limit = get_settings().llm_max_tokens_hard_limit
|
||||||
max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage2_segmentation", hard_limit=hard_limit)
|
max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage2_segmentation", hard_limit=hard_limit)
|
||||||
logger.info("Stage 2 using model=%s, modality=%s, max_tokens=%d", model_override or "default", modality, max_tokens)
|
logger.info("Stage 2 using model=%s, modality=%s, max_tokens=%d", model_override or "default", modality, max_tokens)
|
||||||
raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id),
|
_s2_request_params = {"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "SegmentationResult", "hard_limit": hard_limit}
|
||||||
|
raw = llm.complete(system_prompt, user_prompt, response_model=SegmentationResult, on_complete=_make_llm_callback(video_id, "stage2_segmentation", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, request_params=_s2_request_params),
|
||||||
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
||||||
result = _safe_parse_llm_response(raw, SegmentationResult, llm, system_prompt, user_prompt,
|
result = _safe_parse_llm_response(raw, SegmentationResult, llm, system_prompt, user_prompt,
|
||||||
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
||||||
|
|
@ -459,7 +469,8 @@ def stage3_extraction(self, video_id: str, run_id: str | None = None) -> str:
|
||||||
)
|
)
|
||||||
|
|
||||||
max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage3_extraction", hard_limit=hard_limit)
|
max_tokens = estimate_max_tokens(system_prompt, user_prompt, stage="stage3_extraction", hard_limit=hard_limit)
|
||||||
raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=topic_label),
|
_s3_request_params = {"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "ExtractionResult", "hard_limit": hard_limit}
|
||||||
|
raw = llm.complete(system_prompt, user_prompt, response_model=ExtractionResult, on_complete=_make_llm_callback(video_id, "stage3_extraction", system_prompt=system_prompt, user_prompt=user_prompt, run_id=run_id, context_label=topic_label, request_params=_s3_request_params),
|
||||||
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
||||||
result = _safe_parse_llm_response(raw, ExtractionResult, llm, system_prompt, user_prompt,
|
result = _safe_parse_llm_response(raw, ExtractionResult, llm, system_prompt, user_prompt,
|
||||||
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
modality=modality, model_override=model_override, max_tokens=max_tokens)
|
||||||
|
|
@ -556,6 +567,7 @@ def _classify_moment_batch(
|
||||||
video_id, "stage4_classification",
|
video_id, "stage4_classification",
|
||||||
system_prompt=system_prompt, user_prompt=user_prompt,
|
system_prompt=system_prompt, user_prompt=user_prompt,
|
||||||
run_id=run_id, context_label=batch_label,
|
run_id=run_id, context_label=batch_label,
|
||||||
|
request_params={"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "ClassificationResult", "hard_limit": hard_limit},
|
||||||
),
|
),
|
||||||
modality=modality, model_override=model_override,
|
modality=modality, model_override=model_override,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
|
|
@ -865,6 +877,7 @@ def _synthesize_chunk(
|
||||||
video_id, "stage5_synthesis",
|
video_id, "stage5_synthesis",
|
||||||
system_prompt=system_prompt, user_prompt=user_prompt,
|
system_prompt=system_prompt, user_prompt=user_prompt,
|
||||||
run_id=run_id, context_label=chunk_label,
|
run_id=run_id, context_label=chunk_label,
|
||||||
|
request_params={"max_tokens": estimated_input, "model_override": model_override, "modality": modality, "response_model": "SynthesisResult", "hard_limit": hard_limit},
|
||||||
),
|
),
|
||||||
modality=modality, model_override=model_override, max_tokens=estimated_input,
|
modality=modality, model_override=model_override, max_tokens=estimated_input,
|
||||||
)
|
)
|
||||||
|
|
@ -946,6 +959,7 @@ def _merge_pages_by_slug(
|
||||||
system_prompt=merge_system_prompt,
|
system_prompt=merge_system_prompt,
|
||||||
user_prompt=merge_user_prompt,
|
user_prompt=merge_user_prompt,
|
||||||
run_id=run_id, context_label=f"merge:{slug}",
|
run_id=run_id, context_label=f"merge:{slug}",
|
||||||
|
request_params={"max_tokens": max_tokens, "model_override": model_override, "modality": modality, "response_model": "SynthesisResult", "hard_limit": hard_limit},
|
||||||
),
|
),
|
||||||
modality=modality, model_override=model_override,
|
modality=modality, model_override=model_override,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
|
|
|
||||||
|
|
@ -131,6 +131,7 @@ interface DebugSection {
|
||||||
|
|
||||||
function DebugPayloadViewer({ event }: { event: PipelineEvent }) {
|
function DebugPayloadViewer({ event }: { event: PipelineEvent }) {
|
||||||
const sections: DebugSection[] = [];
|
const sections: DebugSection[] = [];
|
||||||
|
if (event.payload?.request_params) sections.push({ label: "Request Params", content: JSON.stringify(event.payload.request_params, null, 2) });
|
||||||
if (event.system_prompt_text) sections.push({ label: "System Prompt", content: event.system_prompt_text });
|
if (event.system_prompt_text) sections.push({ label: "System Prompt", content: event.system_prompt_text });
|
||||||
if (event.user_prompt_text) sections.push({ label: "User Prompt", content: event.user_prompt_text });
|
if (event.user_prompt_text) sections.push({ label: "User Prompt", content: event.user_prompt_text });
|
||||||
if (event.response_text) sections.push({ label: "Response", content: event.response_text });
|
if (event.response_text) sections.push({ label: "Response", content: event.response_text });
|
||||||
|
|
@ -171,11 +172,19 @@ function DebugPayloadViewer({ event }: { event: PipelineEvent }) {
|
||||||
};
|
};
|
||||||
|
|
||||||
const exportAsJson = () => {
|
const exportAsJson = () => {
|
||||||
const data: Record<string, string | null> = {
|
// Dump everything — full rawdog debug payload with request params
|
||||||
|
const data: Record<string, unknown> = {
|
||||||
event_id: event.id,
|
event_id: event.id,
|
||||||
stage: event.stage,
|
stage: event.stage,
|
||||||
event_type: event.event_type,
|
event_type: event.event_type,
|
||||||
model: event.model,
|
model: event.model,
|
||||||
|
prompt_tokens: event.prompt_tokens,
|
||||||
|
completion_tokens: event.completion_tokens,
|
||||||
|
total_tokens: event.total_tokens,
|
||||||
|
duration_ms: event.duration_ms,
|
||||||
|
created_at: event.created_at,
|
||||||
|
payload: event.payload,
|
||||||
|
request_params: event.payload?.request_params ?? null,
|
||||||
system_prompt_text: event.system_prompt_text,
|
system_prompt_text: event.system_prompt_text,
|
||||||
user_prompt_text: event.user_prompt_text,
|
user_prompt_text: event.user_prompt_text,
|
||||||
response_text: event.response_text,
|
response_text: event.response_text,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue