Skip to content

Commit 97031dc

Browse files
authored
Fix - (openrouter): move cache_control to content blocks for claude/gemini (#15345)
* test_openrouter_transform_request_with_cache_control * fix CacheControlSupportedModels * test_openrouter_transform_request_with_cache_control_gemini
1 parent 0a507c3 commit 97031dc

File tree

2 files changed

+268
-1
lines changed

2 files changed

+268
-1
lines changed

litellm/llms/openrouter/chat/transformation.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
Docs: https://openrouter.ai/docs/parameters
77
"""
88

9+
from enum import Enum
910
from typing import Any, AsyncIterator, Iterator, List, Optional, Tuple, Union
1011

1112
import httpx
@@ -20,6 +21,12 @@
2021
from ..common_utils import OpenRouterException
2122

2223

24+
class CacheControlSupportedModels(str, Enum):
25+
"""Models that support cache_control in content blocks."""
26+
CLAUDE = "claude"
27+
GEMINI = "gemini"
28+
29+
2330
class OpenrouterConfig(OpenAIGPTConfig):
2431
def map_openai_params(
2532
self,
@@ -48,19 +55,73 @@ def map_openai_params(
4855
)
4956
return mapped_openai_params
5057

58+
def _supports_cache_control_in_content(self, model: str) -> bool:
59+
"""
60+
Check if the model supports cache_control in content blocks.
61+
62+
Returns:
63+
bool: True if model supports cache_control (Claude or Gemini models)
64+
"""
65+
model_lower = model.lower()
66+
return any(
67+
supported_model.value in model_lower
68+
for supported_model in CacheControlSupportedModels
69+
)
70+
5171
def remove_cache_control_flag_from_messages_and_tools(
5272
self,
5373
model: str,
5474
messages: List[AllMessageValues],
5575
tools: Optional[List["ChatCompletionToolParam"]] = None,
5676
) -> Tuple[List[AllMessageValues], Optional[List["ChatCompletionToolParam"]]]:
57-
if "claude" in model.lower(): # don't remove 'cache_control' flag
77+
if self._supports_cache_control_in_content(model):
5878
return messages, tools
5979
else:
6080
return super().remove_cache_control_flag_from_messages_and_tools(
6181
model, messages, tools
6282
)
6383

84+
def _move_cache_control_to_content(
85+
self, messages: List[AllMessageValues]
86+
) -> List[AllMessageValues]:
87+
"""
88+
Move cache_control from message level to content blocks.
89+
OpenRouter requires cache_control to be inside content blocks, not at message level.
90+
91+
When cache_control is at message level, it's added to ALL content blocks
92+
to cache the entire message content.
93+
"""
94+
transformed_messages = []
95+
for message in messages:
96+
message_copy = dict(message)
97+
cache_control = message_copy.pop("cache_control", None)
98+
99+
if cache_control is not None:
100+
content = message_copy.get("content")
101+
102+
if isinstance(content, list):
103+
# Content is already a list, add cache_control to all blocks
104+
if len(content) > 0:
105+
content_copy = []
106+
for block in content:
107+
block_copy = dict(block)
108+
block_copy["cache_control"] = cache_control
109+
content_copy.append(block_copy)
110+
message_copy["content"] = content_copy
111+
else:
112+
# Content is a string, convert to structured format
113+
message_copy["content"] = [
114+
{
115+
"type": "text",
116+
"text": content,
117+
"cache_control": cache_control,
118+
}
119+
]
120+
121+
transformed_messages.append(message_copy)
122+
123+
return transformed_messages
124+
64125
def transform_request(
65126
self,
66127
model: str,
@@ -75,6 +136,9 @@ def transform_request(
75136
Returns:
76137
dict: The transformed request. Sent as the body of the API call.
77138
"""
139+
if self._supports_cache_control_in_content(model):
140+
messages = self._move_cache_control_to_content(messages)
141+
78142
extra_body = optional_params.pop("extra_body", {})
79143
response = super().transform_request(
80144
model, messages, optional_params, litellm_params, headers

tests/test_litellm/llms/openrouter/chat/test_openrouter_chat_transformation.py

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,206 @@ def test_openrouter_cache_control_flag_removal():
114114
headers={},
115115
)
116116
assert transformed_request["messages"][0].get("cache_control") is None
117+
118+
119+
120+
def test_openrouter_transform_request_with_cache_control():
121+
"""
122+
Test transform_request moves cache_control from message level to content blocks (string content).
123+
124+
Input:
125+
{
126+
"role": "user",
127+
"content": "what are the key terms...",
128+
"cache_control": {"type": "ephemeral"}
129+
}
130+
131+
Expected Output:
132+
{
133+
"role": "user",
134+
"content": [
135+
{
136+
"type": "text",
137+
"text": "what are the key terms...",
138+
"cache_control": {"type": "ephemeral"}
139+
}
140+
]
141+
}
142+
"""
143+
import json
144+
config = OpenrouterConfig()
145+
146+
messages = [
147+
{
148+
"role": "system",
149+
"content": [
150+
{
151+
"type": "text",
152+
"text": "You are an AI assistant tasked with analyzing legal documents."
153+
},
154+
{
155+
"type": "text",
156+
"text": "Here is the full text of a complex legal agreement"
157+
}
158+
]
159+
},
160+
{
161+
"role": "user",
162+
"content": "what are the key terms and conditions in this agreement?",
163+
"cache_control": {"type": "ephemeral"}
164+
}
165+
]
166+
167+
transformed_request = config.transform_request(
168+
model="openrouter/anthropic/claude-3-5-sonnet-20240620",
169+
messages=messages,
170+
optional_params={},
171+
litellm_params={},
172+
headers={},
173+
)
174+
175+
print("\n=== Transformed Request ===")
176+
print(json.dumps(transformed_request, indent=4, default=str))
177+
178+
assert "messages" in transformed_request
179+
assert len(transformed_request["messages"]) == 2
180+
181+
user_message = transformed_request["messages"][1]
182+
assert user_message["role"] == "user"
183+
assert isinstance(user_message["content"], list)
184+
assert user_message["content"][0]["type"] == "text"
185+
assert user_message["content"][0]["cache_control"] == {"type": "ephemeral"}
186+
187+
188+
def test_openrouter_transform_request_with_cache_control_list_content():
189+
"""
190+
Test transform_request moves cache_control to all content blocks when content is already a list.
191+
192+
Input:
193+
{
194+
"role": "system",
195+
"content": [
196+
{"type": "text", "text": "You are a historian..."},
197+
{"type": "text", "text": "HUGE TEXT BODY"}
198+
],
199+
"cache_control": {"type": "ephemeral"}
200+
}
201+
202+
Expected Output:
203+
{
204+
"role": "system",
205+
"content": [
206+
{
207+
"type": "text",
208+
"text": "You are a historian...",
209+
"cache_control": {"type": "ephemeral"}
210+
},
211+
{
212+
"type": "text",
213+
"text": "HUGE TEXT BODY",
214+
"cache_control": {"type": "ephemeral"}
215+
}
216+
]
217+
}
218+
"""
219+
import json
220+
config = OpenrouterConfig()
221+
222+
messages = [
223+
{
224+
"role": "system",
225+
"content": [
226+
{
227+
"type": "text",
228+
"text": "You are a historian studying the fall of the Roman Empire."
229+
},
230+
{
231+
"type": "text",
232+
"text": "HUGE TEXT BODY"
233+
}
234+
],
235+
"cache_control": {"type": "ephemeral"}
236+
},
237+
{
238+
"role": "user",
239+
"content": "What triggered the collapse?"
240+
}
241+
]
242+
243+
transformed_request = config.transform_request(
244+
model="openrouter/anthropic/claude-3-5-sonnet-20240620",
245+
messages=messages,
246+
optional_params={},
247+
litellm_params={},
248+
headers={},
249+
)
250+
251+
print("\n=== Transformed Request (List Content) ===")
252+
print(json.dumps(transformed_request, indent=4, default=str))
253+
254+
assert "messages" in transformed_request
255+
assert len(transformed_request["messages"]) == 2
256+
257+
system_message = transformed_request["messages"][0]
258+
assert system_message["role"] == "system"
259+
assert isinstance(system_message["content"], list)
260+
assert len(system_message["content"]) == 2
261+
assert system_message["content"][0]["cache_control"] == {"type": "ephemeral"}
262+
assert system_message["content"][1]["cache_control"] == {"type": "ephemeral"}
263+
assert "cache_control" not in system_message
264+
265+
266+
def test_openrouter_transform_request_with_cache_control_gemini():
267+
"""
268+
Test transform_request moves cache_control to content blocks for Gemini models.
269+
270+
Input:
271+
{
272+
"role": "user",
273+
"content": "Analyze this data",
274+
"cache_control": {"type": "ephemeral"}
275+
}
276+
277+
Expected Output:
278+
{
279+
"role": "user",
280+
"content": [
281+
{
282+
"type": "text",
283+
"text": "Analyze this data",
284+
"cache_control": {"type": "ephemeral"}
285+
}
286+
]
287+
}
288+
"""
289+
import json
290+
config = OpenrouterConfig()
291+
292+
messages = [
293+
{
294+
"role": "user",
295+
"content": "Analyze this data",
296+
"cache_control": {"type": "ephemeral"}
297+
}
298+
]
299+
300+
transformed_request = config.transform_request(
301+
model="openrouter/google/gemini-2.0-flash-exp:free",
302+
messages=messages,
303+
optional_params={},
304+
litellm_params={},
305+
headers={},
306+
)
307+
308+
print("\n=== Transformed Request (Gemini) ===")
309+
print(json.dumps(transformed_request, indent=4, default=str))
310+
311+
assert "messages" in transformed_request
312+
assert len(transformed_request["messages"]) == 1
313+
314+
user_message = transformed_request["messages"][0]
315+
assert user_message["role"] == "user"
316+
assert isinstance(user_message["content"], list)
317+
assert user_message["content"][0]["type"] == "text"
318+
assert user_message["content"][0]["cache_control"] == {"type": "ephemeral"}
319+

0 commit comments

Comments
 (0)