diff --git a/backend/pipeline/llm_client.py b/backend/pipeline/llm_client.py index 3ecf1df..f7d23f0 100644 --- a/backend/pipeline/llm_client.py +++ b/backend/pipeline/llm_client.py @@ -140,6 +140,13 @@ class LLMClient: **kwargs, ) raw = response.choices[0].message.content or "" + usage = getattr(response, "usage", None) + if usage: + logger.info( + "LLM response: prompt_tokens=%s, completion_tokens=%s, total=%s, content_len=%d, finish=%s", + usage.prompt_tokens, usage.completion_tokens, usage.total_tokens, + len(raw), response.choices[0].finish_reason, + ) if modality == "thinking": raw = strip_think_tags(raw) return raw @@ -161,6 +168,13 @@ class LLMClient: **kwargs, ) raw = response.choices[0].message.content or "" + usage = getattr(response, "usage", None) + if usage: + logger.info( + "LLM response (fallback): prompt_tokens=%s, completion_tokens=%s, total=%s, content_len=%d, finish=%s", + usage.prompt_tokens, usage.completion_tokens, usage.total_tokens, + len(raw), response.choices[0].finish_reason, + ) if modality == "thinking": raw = strip_think_tags(raw) return raw