Skip to content

Commit 2f42c80

Browse files
authored
[Fix] x-litellm-cache-key header not being returned on cache hit (#15348)
* fix: x-cache-key * test_cache_key_in_hidden_params_acompletion * fix: remove_cache_control_flag_from_messages_and_tools
1 parent 97031dc commit 2f42c80

File tree

3 files changed

+62
-15
lines changed

3 files changed

+62
-15
lines changed

litellm/caching/caching_handler.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
In each method it will call the appropriate method from caching.py
1515
"""
1616

17-
import time
1817
import asyncio
1918
import datetime
2019
import inspect
20+
import time
2121
from typing import (
2222
TYPE_CHECKING,
2323
Any,
@@ -62,12 +62,10 @@
6262
LiteLLMLoggingObj = Any
6363

6464

65-
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
66-
67-
6865
from litellm.litellm_core_utils.core_helpers import (
69-
_get_parent_otel_span_from_kwargs,
66+
_get_parent_otel_span_from_kwargs,
7067
)
68+
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
7169

7270

7371
class CachingHandlerResponse(BaseModel):
@@ -214,9 +212,7 @@ async def _async_get_cache(
214212
end_time=end_time,
215213
cache_hit=cache_hit,
216214
)
217-
cache_key = litellm.cache._get_preset_cache_key_from_kwargs(
218-
**kwargs
219-
)
215+
cache_key = litellm.cache.get_cache_key(**kwargs)
220216
if (
221217
isinstance(cached_result, BaseModel)
222218
or isinstance(cached_result, CustomStreamWrapper)
@@ -330,9 +326,7 @@ def _sync_get_cache(
330326
end_time=end_time,
331327
cache_hit=cache_hit
332328
)
333-
cache_key = litellm.cache._get_preset_cache_key_from_kwargs(
334-
**kwargs
335-
)
329+
cache_key = litellm.cache.get_cache_key(**kwargs)
336330
if (
337331
isinstance(cached_result, BaseModel)
338332
or isinstance(cached_result, CustomStreamWrapper)

litellm/llms/openai/chat/gpt_transformation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -397,13 +397,13 @@ def remove_cache_control_flag_from_messages_and_tools(
397397
)
398398
from litellm.types.llms.openai import ChatCompletionToolParam
399399

400-
for message in messages:
401-
message = cast(
400+
for i, message in enumerate(messages):
401+
messages[i] = cast(
402402
AllMessageValues, filter_value_from_dict(message, "cache_control") # type: ignore
403403
)
404404
if tools is not None:
405-
for tool in tools:
406-
tool = cast(
405+
for i, tool in enumerate(tools):
406+
tools[i] = cast(
407407
ChatCompletionToolParam,
408408
filter_value_from_dict(tool, "cache_control"), # type: ignore
409409
)

tests/local_testing/test_caching.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,3 +2765,56 @@ def test_caching_thinking_args_hit(): # test in memory cache
27652765
except Exception as e:
27662766
print(f"error occurred: {traceback.format_exc()}")
27672767
pytest.fail(f"Error occurred: {e}")
2768+
2769+
2770+
@pytest.mark.asyncio
2771+
async def test_cache_key_in_hidden_params_acompletion():
2772+
"""
2773+
Test that cache_key is present in _hidden_params on cache hits for acompletion.
2774+
2775+
Validates fix for missing x-litellm-cache-key header on proxy cache hits.
2776+
"""
2777+
litellm.cache = Cache(
2778+
type="redis",
2779+
host=os.environ["REDIS_HOST"],
2780+
port=os.environ["REDIS_PORT"],
2781+
password=os.environ["REDIS_PASSWORD"],
2782+
)
2783+
2784+
unique_content = f"test cache key hidden params {uuid.uuid4()}"
2785+
messages = [{"role": "user", "content": unique_content}]
2786+
2787+
# First call - cache miss
2788+
response1 = await litellm.acompletion(
2789+
model="gpt-3.5-turbo",
2790+
messages=messages,
2791+
mock_response="test response",
2792+
caching=True,
2793+
)
2794+
2795+
print(f"Response 1 _hidden_params: {response1._hidden_params}")
2796+
assert response1._hidden_params.get("cache_hit") is not True
2797+
2798+
await asyncio.sleep(0.5)
2799+
2800+
# Second call - cache hit
2801+
response2 = await litellm.acompletion(
2802+
model="gpt-3.5-turbo",
2803+
messages=messages,
2804+
mock_response="test response",
2805+
caching=True,
2806+
)
2807+
2808+
print(f"Response 2 _hidden_params: {response2._hidden_params}")
2809+
2810+
# Verify cache hit occurred
2811+
assert response2._hidden_params.get("cache_hit") is True
2812+
2813+
# Verify cache_key is present in _hidden_params
2814+
assert "cache_key" in response2._hidden_params
2815+
assert response2._hidden_params["cache_key"] is not None
2816+
2817+
# Verify both responses have same ID (cache hit)
2818+
assert response1.id == response2.id
2819+
2820+
litellm.cache = None

0 commit comments

Comments
 (0)