Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions litellm/types/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,14 @@ def __init__( # noqa: PLR0915
):
self._cache_read_input_tokens = params["prompt_cache_hit_tokens"]

## OPENAI MAPPING - populate _cache_read_input_tokens from prompt_tokens_details.cached_tokens ##
if (
self._cache_read_input_tokens == 0
and _prompt_tokens_details is not None
and _prompt_tokens_details.cached_tokens is not None
):
self._cache_read_input_tokens = _prompt_tokens_details.cached_tokens

for k, v in params.items():
setattr(self, k, v)

Expand Down
160 changes: 160 additions & 0 deletions tests/test_litellm/types/test_types_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,163 @@ def test_usage_completion_tokens_details_text_tokens():
# Verify round-trip serialization works
new_usage = Usage(**dump_result)
assert new_usage.completion_tokens_details.text_tokens == 12


def test_usage_openai_cached_tokens_populates_cache_read_input_tokens():
"""
Test that OpenAI's prompt_tokens_details.cached_tokens populates
_cache_read_input_tokens. This is the fix for GH issue #19684.

OpenAI returns cached tokens in prompt_tokens_details.cached_tokens,
but _cache_read_input_tokens was not being set, causing the UI to
show "Cache Read Tokens: 0".
"""
from litellm.types.utils import Usage

# Simulate OpenAI response usage (exactly what comes from response.model_dump())
openai_usage = {
"prompt_tokens": 2829,
"completion_tokens": 29,
"total_tokens": 2858,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0,
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 2816,
},
}

usage = Usage(**openai_usage)

# _cache_read_input_tokens should be populated from prompt_tokens_details.cached_tokens
assert usage._cache_read_input_tokens == 2816
# prompt_tokens_details.cached_tokens should also be preserved
assert usage.prompt_tokens_details.cached_tokens == 2816


def test_usage_openai_cached_tokens_zero_does_not_set_cache_read():
"""
When OpenAI returns cached_tokens=0, _cache_read_input_tokens should stay 0.
"""
from litellm.types.utils import Usage

openai_usage = {
"prompt_tokens": 100,
"completion_tokens": 10,
"total_tokens": 110,
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0,
},
}

usage = Usage(**openai_usage)
assert usage._cache_read_input_tokens == 0
Comment on lines +168 to +185
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Misleading zero-value test

The implementation maps the internal cache-read counter from prompt_tokens_details.cached_tokens whenever that field is present (checked via cached_tokens is not None), even if the value is 0 (litellm/types/utils.py:1559-1565). This test’s name/docstring reads like zero should be treated as “do not map”, which contradicts the actual behavior and the broader goal of treating “present but zero” differently from “missing”. Please update the test name/docstring and/or assertions to reflect the intended contract: mapping occurs when the field is present; lack of mapping should be reserved for missing details or null cached_tokens.



def test_usage_anthropic_cache_read_not_overwritten_by_prompt_details():
"""
When Anthropic explicitly passes cache_read_input_tokens, the OpenAI
fallback mapping should NOT overwrite it.

Flow: prompt_tokens_details={"cached_tokens": 300} creates the wrapper,
then the Anthropic mapping (line ~1502) overwrites cached_tokens to 500
and sets _cache_read_input_tokens=500. The OpenAI fallback sees
_cache_read_input_tokens != 0 and skips.
"""
from litellm.types.utils import Usage

# Anthropic passes cache_read_input_tokens explicitly in **params.
# Use different values to verify the explicit param (500) wins.
usage = Usage(
prompt_tokens=1000,
completion_tokens=50,
total_tokens=1050,
prompt_tokens_details={"cached_tokens": 300},
cache_read_input_tokens=500,
)

# _cache_read_input_tokens must be the explicit Anthropic value (500)
assert usage._cache_read_input_tokens == 500
# The Anthropic mapping also syncs prompt_tokens_details.cached_tokens to 500
assert usage.prompt_tokens_details.cached_tokens == 500


def test_usage_deepseek_cache_read_not_overwritten_by_prompt_details():
"""
When DeepSeek passes prompt_cache_hit_tokens, the OpenAI fallback
mapping should NOT overwrite _cache_read_input_tokens.
"""
from litellm.types.utils import Usage

usage = Usage(
prompt_tokens=1000,
completion_tokens=50,
total_tokens=1050,
prompt_tokens_details={"cached_tokens": 300},
prompt_cache_hit_tokens=700,
)

# _cache_read_input_tokens must be the DeepSeek value (700), not 300
assert usage._cache_read_input_tokens == 700


def test_usage_openai_cached_tokens_none_does_not_set_cache_read():
"""
When prompt_tokens_details exists but cached_tokens is explicitly None,
_cache_read_input_tokens should stay 0.
"""
from litellm.types.utils import Usage

usage = Usage(
prompt_tokens=100,
completion_tokens=10,
total_tokens=110,
prompt_tokens_details={"audio_tokens": 0, "cached_tokens": None},
)

assert usage._cache_read_input_tokens == 0


def test_usage_no_prompt_tokens_details_no_error():
"""
When there's no prompt_tokens_details at all, nothing should break.
"""
from litellm.types.utils import Usage

usage = Usage(
prompt_tokens=100,
completion_tokens=10,
total_tokens=110,
)

assert usage._cache_read_input_tokens == 0


def test_usage_openai_cached_tokens_round_trip():
"""
Verify that _cache_read_input_tokens survives a model_dump() -> Usage()
round-trip, as happens when usage objects are serialized/deserialized.
"""
from litellm.types.utils import Usage

original = Usage(
**{
"prompt_tokens": 2829,
"completion_tokens": 29,
"total_tokens": 2858,
"prompt_tokens_details": {"cached_tokens": 2816, "audio_tokens": 0},
}
)

assert original._cache_read_input_tokens == 2816

# Round-trip through model_dump
restored = Usage(**original.model_dump())

assert restored._cache_read_input_tokens == 2816
assert restored.prompt_tokens_details.cached_tokens == 2816
Loading