Improve live token extraction: warn on missing usage_metadata, capture thinking tokens

- Add WARNING log when usage_metadata/usage is None so zero-cost events
  are visible in logs instead of silently disappearing
- Capture thoughts_token_count from Gemini thinking models into reasoning field
  (already included in candidates_token_count for billing, now also tracked separately)
- Add same warning for OpenAI missing usage object

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-04-24 19:13:39 +01:00
parent d0ad8e67be
commit ad619d45fc

View file

@ -188,19 +188,25 @@ class LLMService:
@staticmethod
def _extract_usage_metadata(response, provider: str) -> dict:
"""Extract token counts from a provider response. All fields default to 0."""
_log = logging.getLogger(__name__)
if provider == 'gemini':
um = getattr(response, 'usage_metadata', None)
if um is None:
_log.warning("Gemini response missing usage_metadata — token counts will be 0, cost recorded as $0")
return {'prompt': 0, 'completion': 0, 'cached': 0, 'reasoning': 0}
# thoughts_token_count (thinking models) is already included in candidates_token_count.
# Capture it separately so the stored event can show the split.
thoughts = getattr(um, 'thoughts_token_count', 0) or 0
return {
'prompt': getattr(um, 'prompt_token_count', 0) or 0,
'completion': getattr(um, 'candidates_token_count', 0) or 0,
'cached': getattr(um, 'cached_content_token_count', 0) or 0,
'reasoning': 0,
'reasoning': thoughts,
}
elif provider == 'openai':
usage = getattr(response, 'usage', None)
if usage is None:
_log.warning("OpenAI response missing usage — token counts will be 0, cost recorded as $0")
return {'prompt': 0, 'completion': 0, 'cached': 0, 'reasoning': 0}
# Responses API (gpt-5.4-2026-03-05)
if hasattr(usage, 'input_tokens'):