Skip to content

Commit 30b7e35

Browse files
fix(llmobs): support multiple system prompts for anthropic [backport 3.3] (#12964)
Backport 4c46271 from #12958 to 3.3. Previously we were assuming the system prompt is just a string. But actually it can be a list of content blocks (similar to the rest of anthropic messages). In this case the input was being dropped since we were setting `content` to be the list of messages. To fix we just prepend the system prompt to the list of messages and have that undergo the same processing logic that we have for the rest of the messages. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: lievan <[email protected]>
1 parent f213b0f commit 30b7e35

File tree

4 files changed

+164
-2
lines changed

4 files changed

+164
-2
lines changed

ddtrace/llmobs/_integrations/anthropic.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Iterable
55
from typing import List
66
from typing import Optional
7+
from typing import Union
78
from urllib.parse import urlparse
89

910
from ddtrace.internal.logger import get_logger
@@ -81,7 +82,7 @@ def _llmobs_set_tags(
8182
}
8283
)
8384

84-
def _extract_input_message(self, messages, system_prompt=None):
85+
def _extract_input_message(self, messages, system_prompt: Optional[Union[str, List[Dict[str, Any]]]] = None):
8586
"""Extract input messages from the stored prompt.
8687
Anthropic allows for messages and multiple texts in a message, which requires some special casing.
8788
"""
@@ -90,7 +91,8 @@ def _extract_input_message(self, messages, system_prompt=None):
9091

9192
input_messages = []
9293
if system_prompt is not None:
93-
input_messages.append({"content": system_prompt, "role": "system"})
94+
messages = [{"content": system_prompt, "role": "system"}] + messages
95+
9496
for message in messages:
9597
if not isinstance(message, dict):
9698
log.warning("Anthropic message input must be a list of message param dicts.")
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
LLM Observability: This fix resolves an issue with anthropic LLM spans where multiple system prompts caused missing input messages.
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
interactions:
2+
- request:
3+
body: '{"max_tokens":15,"messages":[{"role":"user","content":[{"type":"text","text":"Hello,
4+
I am looking for information about some books!"},{"type":"text","text":"What
5+
is the best selling book?"}]}],"model":"claude-3-opus-20240229","system":[{"type":"text","text":"You
6+
are an AI assistant tasked with analyzing literary works."},{"type":"text","text":"only
7+
respond in all caps","cache_control":{"type":"ephemeral"}}],"temperature":0.8}'
8+
headers:
9+
accept:
10+
- application/json
11+
accept-encoding:
12+
- gzip, deflate
13+
anthropic-version:
14+
- '2023-06-01'
15+
connection:
16+
- keep-alive
17+
content-length:
18+
- '429'
19+
content-type:
20+
- application/json
21+
host:
22+
- api.anthropic.com
23+
user-agent:
24+
- Anthropic/Python 0.40.0
25+
x-stainless-arch:
26+
- arm64
27+
x-stainless-async:
28+
- 'false'
29+
x-stainless-lang:
30+
- python
31+
x-stainless-os:
32+
- MacOS
33+
x-stainless-package-version:
34+
- 0.40.0
35+
x-stainless-retry-count:
36+
- '0'
37+
x-stainless-runtime:
38+
- CPython
39+
x-stainless-runtime-version:
40+
- 3.10.13
41+
method: POST
42+
uri: https://api.anthropic.com/v1/messages
43+
response:
44+
body:
45+
string: !!binary |
46+
H4sIAAAAAAAAA2SOXWuDQBBF/4qd5xXUpA/dt2DMhyQV1BRKKbJZJ6nV7Fp3tqSI/70YKrT0aeCe
47+
c4fbQ1UCh4s5F56/X6/Es3+K4zrHU71/X9LxMxbAgL5aHC00RpwRGHS6GQNhTGVIKAIGF11iAxxk
48+
I2yJ7szVrTVu4AVzLwgegIHUilAR8Jd+ekh4Hau3w2ET7XaJk2+iNLpzFmGYpMvt49rJE+dpkW6T
49+
Q+ZkySENo4yNEgyvDAzptuhQGK3GfeJakK5RGfhBBj8sKonAlW0aBva2n/dQqdbSJPP5jIEU8g0L
50+
2aGgSqvir+BNvENR/mfa0u/Evx+GbwAAAP//AwDHLVn/WQEAAA==
51+
headers:
52+
CF-RAY:
53+
- 927953253c178d1b-BOS
54+
Connection:
55+
- keep-alive
56+
Content-Encoding:
57+
- gzip
58+
Content-Type:
59+
- application/json
60+
Date:
61+
- Fri, 28 Mar 2025 18:52:29 GMT
62+
Server:
63+
- cloudflare
64+
Transfer-Encoding:
65+
- chunked
66+
X-Robots-Tag:
67+
- none
68+
anthropic-organization-id:
69+
- 4257e925-ee99-4ee8-9c62-8e53716d5203
70+
anthropic-ratelimit-input-tokens-limit:
71+
- '400000'
72+
anthropic-ratelimit-input-tokens-remaining:
73+
- '400000'
74+
anthropic-ratelimit-input-tokens-reset:
75+
- '2025-03-28T18:52:28Z'
76+
anthropic-ratelimit-output-tokens-limit:
77+
- '80000'
78+
anthropic-ratelimit-output-tokens-remaining:
79+
- '80000'
80+
anthropic-ratelimit-output-tokens-reset:
81+
- '2025-03-28T18:52:28Z'
82+
anthropic-ratelimit-requests-limit:
83+
- '4000'
84+
anthropic-ratelimit-requests-remaining:
85+
- '3999'
86+
anthropic-ratelimit-requests-reset:
87+
- '2025-03-28T18:52:28Z'
88+
anthropic-ratelimit-tokens-limit:
89+
- '480000'
90+
anthropic-ratelimit-tokens-remaining:
91+
- '480000'
92+
anthropic-ratelimit-tokens-reset:
93+
- '2025-03-28T18:52:28Z'
94+
cf-cache-status:
95+
- DYNAMIC
96+
request-id:
97+
- req_014iJLarFdJBNdJkwwRKsqGp
98+
via:
99+
- 1.1 google
100+
status:
101+
code: 200
102+
message: OK
103+
version: 1

tests/contrib/anthropic/test_anthropic_llmobs.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,59 @@ def test_completion(self, anthropic, ddtrace_global_config, mock_llmobs_writer,
103103
)
104104
)
105105

106+
def test_completion_with_multiple_system_prompts(
107+
self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_tracer, request_vcr
108+
):
109+
"""Ensure llmobs records are emitted for completion endpoints with a list of messages as the system prompt.
110+
111+
Also ensure the llmobs records have the correct tagging including trace/span ID for trace correlation.
112+
"""
113+
llm = anthropic.Anthropic()
114+
with request_vcr.use_cassette("anthropic_completion_multi_system_prompt.yaml"):
115+
llm.messages.create(
116+
model="claude-3-opus-20240229",
117+
max_tokens=15,
118+
temperature=0.8,
119+
system=[
120+
{
121+
"type": "text",
122+
"text": "You are an AI assistant tasked with analyzing literary works.",
123+
},
124+
{"type": "text", "text": "only respond in all caps", "cache_control": {"type": "ephemeral"}},
125+
],
126+
messages=[
127+
{
128+
"role": "user",
129+
"content": [
130+
{"type": "text", "text": "Hello, I am looking for information about some books!"},
131+
{"type": "text", "text": "What is the best selling book?"},
132+
],
133+
}
134+
],
135+
)
136+
span = mock_tracer.pop_traces()[0][0]
137+
assert mock_llmobs_writer.enqueue.call_count == 1
138+
mock_llmobs_writer.enqueue.assert_called_with(
139+
_expected_llmobs_llm_span_event(
140+
span,
141+
model_name="claude-3-opus-20240229",
142+
model_provider="anthropic",
143+
input_messages=[
144+
{
145+
"content": "You are an AI assistant tasked with analyzing literary works.",
146+
"role": "system",
147+
},
148+
{"content": "only respond in all caps", "role": "system"},
149+
{"content": "Hello, I am looking for information about some books!", "role": "user"},
150+
{"content": "What is the best selling book?", "role": "user"},
151+
],
152+
output_messages=[{"content": "HELLO THERE! ACCORDING TO VARIOUS SOURCES, THE", "role": "assistant"}],
153+
metadata={"temperature": 0.8, "max_tokens": 15.0},
154+
token_metrics={"input_tokens": 43, "output_tokens": 15, "total_tokens": 58},
155+
tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.anthropic"},
156+
)
157+
)
158+
106159
def test_error(self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_tracer, request_vcr):
107160
"""Ensure llmobs records are emitted for completion endpoints when configured and there is an error.
108161

0 commit comments

Comments
 (0)