Skip to content

Commit 145d38f

Browse files
authored
test(openai): add tests for prompt_cache_key parameter and update docs (#32363)
Introduce tests to validate the behavior and inclusion of the `prompt_cache_key` parameter in request payloads for the `ChatOpenAI` model.
1 parent 68c70da commit 145d38f

File tree

3 files changed

+152
-0
lines changed

3 files changed

+152
-0
lines changed

libs/partners/openai/langchain_openai/chat_models/base.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2731,6 +2731,31 @@ class Joke(BaseModel):
27312731
Always use ``extra_body`` for custom parameters, **not** ``model_kwargs``.
27322732
Using ``model_kwargs`` for non-OpenAI parameters will cause API errors.
27332733
2734+
.. dropdown:: Prompt caching optimization
2735+
2736+
For high-volume applications with repetitive prompts, use ``prompt_cache_key``
2737+
per-invocation to improve cache hit rates and reduce costs:
2738+
2739+
.. code-block:: python
2740+
2741+
llm = ChatOpenAI(model="gpt-4o-mini")
2742+
2743+
response = llm.invoke(
2744+
messages,
2745+
prompt_cache_key="example-key-a", # Routes to same machine for cache hits
2746+
)
2747+
2748+
customer_response = llm.invoke(messages, prompt_cache_key="example-key-b")
2749+
support_response = llm.invoke(messages, prompt_cache_key="example-key-c")
2750+
2751+
# Dynamic cache keys based on context
2752+
cache_key = f"example-key-{dynamic_suffix}"
2753+
response = llm.invoke(messages, prompt_cache_key=cache_key)
2754+
2755+
Cache keys help ensure requests with the same prompt prefix are routed to
2756+
machines with existing cache, providing cost reduction and latency improvement on
2757+
cached tokens.
2758+
27342759
""" # noqa: E501
27352760

27362761
max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")

libs/partners/openai/tests/integration_tests/chat_models/test_base.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,3 +1110,46 @@ class ResponseFormat(BaseModel):
11101110
assert isinstance(aggregated["raw"], AIMessage)
11111111
assert aggregated["raw"].tool_calls
11121112
assert aggregated["parsed"] is None
1113+
1114+
1115+
@pytest.mark.scheduled
1116+
def test_prompt_cache_key_invoke() -> None:
1117+
"""Test that prompt_cache_key works with invoke calls."""
1118+
chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=20)
1119+
messages = [HumanMessage("Say hello")]
1120+
1121+
# Test that invoke works with prompt_cache_key parameter
1122+
response = chat.invoke(messages, prompt_cache_key="integration-test-v1")
1123+
1124+
assert isinstance(response, AIMessage)
1125+
assert isinstance(response.content, str)
1126+
assert len(response.content) > 0
1127+
1128+
# Test that subsequent call with same cache key also works
1129+
response2 = chat.invoke(messages, prompt_cache_key="integration-test-v1")
1130+
1131+
assert isinstance(response2, AIMessage)
1132+
assert isinstance(response2.content, str)
1133+
assert len(response2.content) > 0
1134+
1135+
1136+
@pytest.mark.scheduled
1137+
def test_prompt_cache_key_usage_methods_integration() -> None:
1138+
"""Integration test for prompt_cache_key usage methods."""
1139+
messages = [HumanMessage("Say hi")]
1140+
1141+
# Test keyword argument method
1142+
chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10)
1143+
response = chat.invoke(messages, prompt_cache_key="integration-test-v1")
1144+
assert isinstance(response, AIMessage)
1145+
assert isinstance(response.content, str)
1146+
1147+
# Test model-level via model_kwargs
1148+
chat_model_level = ChatOpenAI(
1149+
model="gpt-4o-mini",
1150+
max_completion_tokens=10,
1151+
model_kwargs={"prompt_cache_key": "integration-model-level-v1"},
1152+
)
1153+
response_model_level = chat_model_level.invoke(messages)
1154+
assert isinstance(response_model_level, AIMessage)
1155+
assert isinstance(response_model_level.content, str)
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""Unit tests for prompt_cache_key parameter."""
2+
3+
from langchain_core.messages import HumanMessage
4+
5+
from langchain_openai import ChatOpenAI
6+
7+
8+
def test_prompt_cache_key_parameter_inclusion() -> None:
9+
"""Test that prompt_cache_key parameter is properly included in request payload."""
10+
chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10)
11+
messages = [HumanMessage("Hello")]
12+
13+
payload = chat._get_request_payload(messages, prompt_cache_key="test-cache-key")
14+
assert "prompt_cache_key" in payload
15+
assert payload["prompt_cache_key"] == "test-cache-key"
16+
17+
18+
def test_prompt_cache_key_parameter_exclusion() -> None:
19+
"""Test that prompt_cache_key parameter behavior matches OpenAI API."""
20+
chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10)
21+
messages = [HumanMessage("Hello")]
22+
23+
# Test with explicit None (OpenAI should accept None values (marked Optional))
24+
payload = chat._get_request_payload(messages, prompt_cache_key=None)
25+
assert "prompt_cache_key" in payload
26+
assert payload["prompt_cache_key"] is None
27+
28+
29+
def test_prompt_cache_key_per_call() -> None:
30+
"""Test that prompt_cache_key can be passed per-call with different values."""
31+
chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10)
32+
messages = [HumanMessage("Hello")]
33+
34+
# Test different cache keys per call
35+
payload1 = chat._get_request_payload(messages, prompt_cache_key="cache-v1")
36+
payload2 = chat._get_request_payload(messages, prompt_cache_key="cache-v2")
37+
38+
assert payload1["prompt_cache_key"] == "cache-v1"
39+
assert payload2["prompt_cache_key"] == "cache-v2"
40+
41+
# Test dynamic cache key assignment
42+
cache_keys = ["customer-v1", "support-v1", "feedback-v1"]
43+
44+
for cache_key in cache_keys:
45+
payload = chat._get_request_payload(messages, prompt_cache_key=cache_key)
46+
assert "prompt_cache_key" in payload
47+
assert payload["prompt_cache_key"] == cache_key
48+
49+
50+
def test_prompt_cache_key_model_kwargs() -> None:
51+
"""Test prompt_cache_key via model_kwargs and method precedence."""
52+
messages = [HumanMessage("Hello world")]
53+
54+
# Test model-level via model_kwargs
55+
chat = ChatOpenAI(
56+
model="gpt-4o-mini",
57+
max_completion_tokens=10,
58+
model_kwargs={"prompt_cache_key": "model-level-cache"},
59+
)
60+
payload = chat._get_request_payload(messages)
61+
assert "prompt_cache_key" in payload
62+
assert payload["prompt_cache_key"] == "model-level-cache"
63+
64+
# Test that per-call cache key overrides model-level
65+
payload_override = chat._get_request_payload(
66+
messages, prompt_cache_key="per-call-cache"
67+
)
68+
assert payload_override["prompt_cache_key"] == "per-call-cache"
69+
70+
71+
def test_prompt_cache_key_responses_api() -> None:
72+
"""Test that prompt_cache_key works with Responses API."""
73+
chat = ChatOpenAI(
74+
model="gpt-4o-mini", use_responses_api=True, max_completion_tokens=10
75+
)
76+
77+
messages = [HumanMessage("Hello")]
78+
payload = chat._get_request_payload(
79+
messages, prompt_cache_key="responses-api-cache-v1"
80+
)
81+
82+
# prompt_cache_key should be present regardless of API type
83+
assert "prompt_cache_key" in payload
84+
assert payload["prompt_cache_key"] == "responses-api-cache-v1"

0 commit comments

Comments
 (0)