-
-
Notifications
You must be signed in to change notification settings - Fork 5.8k
fix(usage): map OpenAI cached_tokens to _cache_read_input_tokens #20878
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
36a7c2b
6780d7f
0650ea0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -127,3 +127,95 @@ def test_usage_completion_tokens_details_text_tokens(): | |
| # Verify round-trip serialization works | ||
| new_usage = Usage(**dump_result) | ||
| assert new_usage.completion_tokens_details.text_tokens == 12 | ||
|
|
||
|
|
||
| def test_usage_openai_cached_tokens_populates_cache_read_input_tokens(): | ||
| """ | ||
| Test that OpenAI's prompt_tokens_details.cached_tokens populates | ||
| _cache_read_input_tokens. This is the fix for GH issue #19684. | ||
|
|
||
| OpenAI returns cached tokens in prompt_tokens_details.cached_tokens, | ||
| but _cache_read_input_tokens was not being set, causing the UI to | ||
| show "Cache Read Tokens: 0". | ||
| """ | ||
| from litellm.types.utils import Usage | ||
|
|
||
| # Simulate OpenAI response usage (exactly what comes from response.model_dump()) | ||
| openai_usage = { | ||
| "prompt_tokens": 2829, | ||
| "completion_tokens": 29, | ||
| "total_tokens": 2858, | ||
| "completion_tokens_details": { | ||
| "accepted_prediction_tokens": 0, | ||
| "audio_tokens": 0, | ||
| "reasoning_tokens": 0, | ||
| "rejected_prediction_tokens": 0, | ||
| }, | ||
| "prompt_tokens_details": { | ||
| "audio_tokens": 0, | ||
| "cached_tokens": 2816, | ||
| }, | ||
| } | ||
|
|
||
| usage = Usage(**openai_usage) | ||
|
|
||
| # _cache_read_input_tokens should be populated from prompt_tokens_details.cached_tokens | ||
| assert usage._cache_read_input_tokens == 2816 | ||
| # prompt_tokens_details.cached_tokens should also be preserved | ||
| assert usage.prompt_tokens_details.cached_tokens == 2816 | ||
|
|
||
|
|
||
| def test_usage_openai_cached_tokens_zero_does_not_set_cache_read(): | ||
| """ | ||
| When OpenAI returns cached_tokens=0, _cache_read_input_tokens should stay 0. | ||
| """ | ||
| from litellm.types.utils import Usage | ||
|
|
||
| openai_usage = { | ||
| "prompt_tokens": 100, | ||
| "completion_tokens": 10, | ||
| "total_tokens": 110, | ||
| "prompt_tokens_details": { | ||
| "audio_tokens": 0, | ||
| "cached_tokens": 0, | ||
| }, | ||
| } | ||
|
|
||
| usage = Usage(**openai_usage) | ||
| assert usage._cache_read_input_tokens == 0 | ||
|
Comment on lines
+168
to
+185
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Misleading zero-value test The implementation maps the internal cache-read counter from |
||
|
|
||
|
|
||
| def test_usage_anthropic_cache_read_not_overwritten_by_prompt_details(): | ||
| """ | ||
| When Anthropic explicitly passes cache_read_input_tokens, the OpenAI | ||
| fallback mapping should NOT overwrite it. | ||
| """ | ||
| from litellm.types.utils import Usage | ||
|
|
||
| # Anthropic passes cache_read_input_tokens explicitly in **params | ||
| usage = Usage( | ||
| prompt_tokens=1000, | ||
| completion_tokens=50, | ||
| total_tokens=1050, | ||
| prompt_tokens_details={"cached_tokens": 500}, | ||
| cache_read_input_tokens=500, | ||
| ) | ||
|
|
||
| # Should use the explicit Anthropic value, not overwrite it | ||
| assert usage._cache_read_input_tokens == 500 | ||
| assert usage.prompt_tokens_details.cached_tokens == 500 | ||
|
||
|
|
||
|
|
||
| def test_usage_no_prompt_tokens_details_no_error(): | ||
| """ | ||
| When there's no prompt_tokens_details at all, nothing should break. | ||
| """ | ||
| from litellm.types.utils import Usage | ||
|
|
||
| usage = Usage( | ||
| prompt_tokens=100, | ||
| completion_tokens=10, | ||
| total_tokens=110, | ||
| ) | ||
|
|
||
| assert usage._cache_read_input_tokens == 0 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cache tokens lost when None
Usage.__init__only sets_cache_read_input_tokensfromprompt_tokens_details.cached_tokenswhen it’s> 0(litellm/types/utils.py:1559-1566). If OpenAI returns a validcached_tokensvalue of0(or the UI expects to reflect that the field was present), this mapping won’t run and you can’t distinguish “absent” vs “present but zero”. Since the PR intent is to map OpenAI’s field, consider setting_cache_read_input_tokenswhencached_tokens is not None(and still keep the “don’t overwrite provider-set value” guard).