Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions litellm/types/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,15 @@ def __init__( # noqa: PLR0915
):
self._cache_read_input_tokens = params["prompt_cache_hit_tokens"]

## OPENAI MAPPING - populate _cache_read_input_tokens from prompt_tokens_details.cached_tokens ##
if (
self._cache_read_input_tokens == 0
and _prompt_tokens_details is not None
and _prompt_tokens_details.cached_tokens is not None
and _prompt_tokens_details.cached_tokens > 0
):
self._cache_read_input_tokens = _prompt_tokens_details.cached_tokens
Comment on lines 1559 to 1565
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cache tokens lost when None

Usage.__init__ only sets _cache_read_input_tokens from prompt_tokens_details.cached_tokens when it’s > 0 (litellm/types/utils.py:1559-1566). If OpenAI returns a valid cached_tokens value of 0 (or the UI expects to reflect that the field was present), this mapping won’t run and you can’t distinguish “absent” vs “present but zero”. Since the PR intent is to map OpenAI’s field, consider setting _cache_read_input_tokens when cached_tokens is not None (and still keep the “don’t overwrite provider-set value” guard).


for k, v in params.items():
setattr(self, k, v)

Expand Down
92 changes: 92 additions & 0 deletions tests/test_litellm/types/test_types_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,95 @@ def test_usage_completion_tokens_details_text_tokens():
# Verify round-trip serialization works
new_usage = Usage(**dump_result)
assert new_usage.completion_tokens_details.text_tokens == 12


def test_usage_openai_cached_tokens_populates_cache_read_input_tokens():
"""
Test that OpenAI's prompt_tokens_details.cached_tokens populates
_cache_read_input_tokens. This is the fix for GH issue #19684.

OpenAI returns cached tokens in prompt_tokens_details.cached_tokens,
but _cache_read_input_tokens was not being set, causing the UI to
show "Cache Read Tokens: 0".
"""
from litellm.types.utils import Usage

# Simulate OpenAI response usage (exactly what comes from response.model_dump())
openai_usage = {
"prompt_tokens": 2829,
"completion_tokens": 29,
"total_tokens": 2858,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0,
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 2816,
},
}

usage = Usage(**openai_usage)

# _cache_read_input_tokens should be populated from prompt_tokens_details.cached_tokens
assert usage._cache_read_input_tokens == 2816
# prompt_tokens_details.cached_tokens should also be preserved
assert usage.prompt_tokens_details.cached_tokens == 2816


def test_usage_openai_cached_tokens_zero_does_not_set_cache_read():
"""
When OpenAI returns cached_tokens=0, _cache_read_input_tokens should stay 0.
"""
from litellm.types.utils import Usage

openai_usage = {
"prompt_tokens": 100,
"completion_tokens": 10,
"total_tokens": 110,
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0,
},
}

usage = Usage(**openai_usage)
assert usage._cache_read_input_tokens == 0
Comment on lines +168 to +185
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Misleading zero-value test

The implementation maps the internal cache-read counter from prompt_tokens_details.cached_tokens whenever that field is present (checked via cached_tokens is not None), even if the value is 0 (litellm/types/utils.py:1559-1565). This test’s name/docstring reads like zero should be treated as “do not map”, which contradicts the actual behavior and the broader goal of treating “present but zero” differently from “missing”. Please update the test name/docstring and/or assertions to reflect the intended contract: mapping occurs when the field is present; lack of mapping should be reserved for missing details or null cached_tokens.



def test_usage_anthropic_cache_read_not_overwritten_by_prompt_details():
"""
When Anthropic explicitly passes cache_read_input_tokens, the OpenAI
fallback mapping should NOT overwrite it.
"""
from litellm.types.utils import Usage

# Anthropic passes cache_read_input_tokens explicitly in **params
usage = Usage(
prompt_tokens=1000,
completion_tokens=50,
total_tokens=1050,
prompt_tokens_details={"cached_tokens": 500},
cache_read_input_tokens=500,
)

# Should use the explicit Anthropic value, not overwrite it
assert usage._cache_read_input_tokens == 500
assert usage.prompt_tokens_details.cached_tokens == 500
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-overwrite test is weak

In test_usage_anthropic_cache_read_not_overwritten_by_prompt_details the two inputs that could conflict are identical, so the test can’t detect an overwrite regression (tests/test_litellm/types/test_types_utils.py:200-206). Change the test to pass different values for the two fields and assert the explicit cache-read value is preserved.



def test_usage_no_prompt_tokens_details_no_error():
"""
When there's no prompt_tokens_details at all, nothing should break.
"""
from litellm.types.utils import Usage

usage = Usage(
prompt_tokens=100,
completion_tokens=10,
total_tokens=110,
)

assert usage._cache_read_input_tokens == 0
Loading