fix(llmobs): support multiple system prompts for anthropic [backport 3.3] (#12964)

github-actions[bot] · lievan · web-flow · commit 30b7e35824e9 · 2025-04-02T21:24:44.000Z
Backport 4c46271 from #12958 to 3.3. Previously we were assuming the system prompt is just a string. But actually it can be a list of content blocks (similar to the rest of anthropic messages). In this case the input was being dropped since we were setting `content` to be the list of messages. To fix we just prepend the system prompt to the list of messages and have that undergo the same processing logic that we have for the rest of the messages. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: lievan <42917263+lievan@users.noreply.github.com>
diff --git a/ddtrace/llmobs/_integrations/anthropic.py b/ddtrace/llmobs/_integrations/anthropic.py
@@ -4,6 +4,7 @@
 from typing import Iterable
 from typing import List
 from typing import Optional
+from typing import Union
 from urllib.parse import urlparse
 
 from ddtrace.internal.logger import get_logger
@@ -81,7 +82,7 @@ def _llmobs_set_tags(
             }
         )
 
-    def _extract_input_message(self, messages, system_prompt=None):
+    def _extract_input_message(self, messages, system_prompt: Optional[Union[str, List[Dict[str, Any]]]] = None):
         """Extract input messages from the stored prompt.
         Anthropic allows for messages and multiple texts in a message, which requires some special casing.
         """
@@ -90,7 +91,8 @@ def _extract_input_message(self, messages, system_prompt=None):
 
         input_messages = []
         if system_prompt is not None:
-            input_messages.append({"content": system_prompt, "role": "system"})
+            messages = [{"content": system_prompt, "role": "system"}] + messages
+
         for message in messages:
             if not isinstance(message, dict):
                 log.warning("Anthropic message input must be a list of message param dicts.")
diff --git a/releasenotes/notes/fix-ant-system-219f0c0332831ac4.yaml b/releasenotes/notes/fix-ant-system-219f0c0332831ac4.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    LLM Observability: This fix resolves an issue with anthropic LLM spans where multiple system prompts caused missing input messages.
diff --git a/tests/contrib/anthropic/cassettes/anthropic_completion_multi_system_prompt.yaml b/tests/contrib/anthropic/cassettes/anthropic_completion_multi_system_prompt.yaml
@@ -0,0 +1,103 @@
+interactions:
+- request:
+    body: '{"max_tokens":15,"messages":[{"role":"user","content":[{"type":"text","text":"Hello,
+      I am looking for information about some books!"},{"type":"text","text":"What
+      is the best selling book?"}]}],"model":"claude-3-opus-20240229","system":[{"type":"text","text":"You
+      are an AI assistant tasked with analyzing literary works."},{"type":"text","text":"only
+      respond in all caps","cache_control":{"type":"ephemeral"}}],"temperature":0.8}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '429'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - Anthropic/Python 0.40.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.40.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.13
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA2SOXWuDQBBF/4qd5xXUpA/dt2DMhyQV1BRKKbJZJ6nV7Fp3tqSI/70YKrT0aeCe
+        c4fbQ1UCh4s5F56/X6/Es3+K4zrHU71/X9LxMxbAgL5aHC00RpwRGHS6GQNhTGVIKAIGF11iAxxk
+        I2yJ7szVrTVu4AVzLwgegIHUilAR8Jd+ekh4Hau3w2ET7XaJk2+iNLpzFmGYpMvt49rJE+dpkW6T
+        Q+ZkySENo4yNEgyvDAzptuhQGK3GfeJakK5RGfhBBj8sKonAlW0aBva2n/dQqdbSJPP5jIEU8g0L
+        2aGgSqvir+BNvENR/mfa0u/Evx+GbwAAAP//AwDHLVn/WQEAAA==
+    headers:
+      CF-RAY:
+      - 927953253c178d1b-BOS
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 28 Mar 2025 18:52:29 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 4257e925-ee99-4ee8-9c62-8e53716d5203
+      anthropic-ratelimit-input-tokens-limit:
+      - '400000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '400000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2025-03-28T18:52:28Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '80000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '80000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2025-03-28T18:52:28Z'
+      anthropic-ratelimit-requests-limit:
+      - '4000'
+      anthropic-ratelimit-requests-remaining:
+      - '3999'
+      anthropic-ratelimit-requests-reset:
+      - '2025-03-28T18:52:28Z'
+      anthropic-ratelimit-tokens-limit:
+      - '480000'
+      anthropic-ratelimit-tokens-remaining:
+      - '480000'
+      anthropic-ratelimit-tokens-reset:
+      - '2025-03-28T18:52:28Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_014iJLarFdJBNdJkwwRKsqGp
+      via:
+      - 1.1 google
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/anthropic/test_anthropic_llmobs.py b/tests/contrib/anthropic/test_anthropic_llmobs.py
@@ -103,6 +103,59 @@ def test_completion(self, anthropic, ddtrace_global_config, mock_llmobs_writer,
             )
         )
 
+    def test_completion_with_multiple_system_prompts(
+        self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_tracer, request_vcr
+    ):
+        """Ensure llmobs records are emitted for completion endpoints with a list of messages as the system prompt.
+
+        Also ensure the llmobs records have the correct tagging including trace/span ID for trace correlation.
+        """
+        llm = anthropic.Anthropic()
+        with request_vcr.use_cassette("anthropic_completion_multi_system_prompt.yaml"):
+            llm.messages.create(
+                model="claude-3-opus-20240229",
+                max_tokens=15,
+                temperature=0.8,
+                system=[
+                    {
+                        "type": "text",
+                        "text": "You are an AI assistant tasked with analyzing literary works.",
+                    },
+                    {"type": "text", "text": "only respond in all caps", "cache_control": {"type": "ephemeral"}},
+                ],
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "Hello, I am looking for information about some books!"},
+                            {"type": "text", "text": "What is the best selling book?"},
+                        ],
+                    }
+                ],
+            )
+        span = mock_tracer.pop_traces()[0][0]
+        assert mock_llmobs_writer.enqueue.call_count == 1
+        mock_llmobs_writer.enqueue.assert_called_with(
+            _expected_llmobs_llm_span_event(
+                span,
+                model_name="claude-3-opus-20240229",
+                model_provider="anthropic",
+                input_messages=[
+                    {
+                        "content": "You are an AI assistant tasked with analyzing literary works.",
+                        "role": "system",
+                    },
+                    {"content": "only respond in all caps", "role": "system"},
+                    {"content": "Hello, I am looking for information about some books!", "role": "user"},
+                    {"content": "What is the best selling book?", "role": "user"},
+                ],
+                output_messages=[{"content": "HELLO THERE! ACCORDING TO VARIOUS SOURCES, THE", "role": "assistant"}],
+                metadata={"temperature": 0.8, "max_tokens": 15.0},
+                token_metrics={"input_tokens": 43, "output_tokens": 15, "total_tokens": 58},
+                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.anthropic"},
+            )
+        )
+
     def test_error(self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_tracer, request_vcr):
         """Ensure llmobs records are emitted for completion endpoints when configured and there is an error.
 

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +---
 +fixes:
 +  - |
 +    LLM Observability: This fix resolves an issue with anthropic LLM spans where multiple system prompts caused missing input messages.