forked from krafton-ai/KIRA
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanthropic_caching.py
More file actions
62 lines (53 loc) · 2.44 KB
/
anthropic_caching.py
File metadata and controls
62 lines (53 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import copy
from typing import Any, Dict, List
from litellm import Message
def add_anthropic_caching(
messages: List[Dict[str, Any] | Message], model_name: str
) -> List[Dict[str, Any] | Message]:
"""
Add ephemeral caching to the most recent messages for Anthropic models.
Args:
messages: List of message dictionaries
model_name: The model name to check if it's an Anthropic model
Returns:
List of messages with caching added to the most recent 3 messages
"""
# Only apply caching for Anthropic models
if not ("anthropic" in model_name.lower() or "claude" in model_name.lower()):
return messages
# Create a deep copy to avoid modifying the original messages
cached_messages = copy.deepcopy(messages)
# Add cache_control to the most recent 3 messages
for n in range(len(cached_messages)):
if n >= len(cached_messages) - 3:
msg = cached_messages[n]
# Handle both dict and Message-like objects
if isinstance(msg, dict):
# Ensure content is in the expected format
if isinstance(msg.get("content"), str):
msg["content"] = [
{
"type": "text",
"text": msg["content"],
"cache_control": {"type": "ephemeral"},
}
]
elif isinstance(msg.get("content"), list):
# Add cache_control to each content item
for content_item in msg["content"]:
if isinstance(content_item, dict) and "type" in content_item:
content_item["cache_control"] = {"type": "ephemeral"}
elif hasattr(msg, "content"):
if isinstance(msg.content, str):
msg.content = [ # type: ignore
{
"type": "text",
"text": msg.content,
"cache_control": {"type": "ephemeral"},
}
] # type: ignore
elif isinstance(msg.content, list):
for content_item in msg.content: # type: ignore
if isinstance(content_item, dict) and "type" in content_item:
content_item["cache_control"] = {"type": "ephemeral"}
return cached_messages