fix(openai): safely tag potentially null chat completion message [backport 1.19] (#7080)

github-actions[bot] · Yun-Kim · ZStriker19 · web-flow · commit e4d4599bf55e · 2023-10-19T08:57:13.000Z
Backport a25570b from #7054 to 1.19. This fix ensures that we default to tagging empty strings from OpenAI's ChatCompletion response message content field. With the recent updates to OpenAI's API on allowing function calling arguments to the ChatCompletion endpoint, the ChatCompletion response can now (if function calling is specified in the request) contain an additional `function_call` parameter which contains the LLM text response, and the original `content` field now is `None`. Previously, this `content` field was guaranteed to contain a text value from the LLM, but now when we try to directly tag `None` onto the span, this results in `TypeError`. With this fix, we will check the value of the message content field and default to an empty string before tagging. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com> Co-authored-by: Zachary Groves <32471391+ZStriker19@users.noreply.github.com>
diff --git a/ddtrace/contrib/openai/_endpoint_hooks.py b/ddtrace/contrib/openai/_endpoint_hooks.py
@@ -279,10 +279,8 @@ def _record_response(self, pin, integration, span, args, kwargs, resp, error):
             idx = choice["index"]
             span.set_tag_str("openai.response.choices.%d.finish_reason" % idx, choice.get("finish_reason"))
             if integration.is_pc_sampled_span(span) and choice.get("message"):
-                span.set_tag_str(
-                    "openai.response.choices.%d.message.content" % idx,
-                    integration.trunc(choice.get("message", {}).get("content", "")),
-                )
+                content = choice.get("message", {}).get("content", "") or ""
+                span.set_tag_str("openai.response.choices.%d.message.content" % idx, integration.trunc(content))
                 span.set_tag_str(
                     "openai.response.choices.%d.message.role" % idx,
                     integration.trunc(choice.get("message", {}).get("role", "")),
diff --git a/releasenotes/notes/fix-openai-chat-completion-empty-message-fdd7c70643cf366f.yaml b/releasenotes/notes/fix-openai-chat-completion-empty-message-fdd7c70643cf366f.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    openai: This fix resolves an issue where chat completion requests with function calls led to failing to
+    tag null message content fields in the chat completion response.
diff --git a/tests/contrib/openai/cassettes/chat_completion_function_call.yaml b/tests/contrib/openai/cassettes/chat_completion_function_call.yaml
@@ -0,0 +1,97 @@
+interactions:
+- request:
+    body: '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "\n    David
+      Nguyen is a sophomore majoring in computer science at Stanford University and
+      has a GPA of 3.8.\n    David is an active member of the university''s Chess
+      Club and the South Asian Student Association.\n    He hopes to pursue a career
+      in software engineering after graduating.\n    "}], "functions": [{"name": "extract_student_info",
+      "description": "Get the student information from the body of the input text",
+      "parameters": {"type": "object", "properties": {"name": {"type": "string", "description":
+      "Name of the person"}, "major": {"type": "string", "description": "Major subject."},
+      "school": {"type": "string", "description": "The university name."}, "grades":
+      {"type": "integer", "description": "GPA of the student."}, "clubs": {"type":
+      "array", "description": "School clubs for extracurricular activities. ", "items":
+      {"type": "string", "description": "Name of School Club"}}}}}], "function_call":
+      "auto", "user": "ddtrace-test"}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '1014'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - OpenAI/v1 PythonBindings/0.27.2
+      X-OpenAI-Client-User-Agent:
+      - '{"bindings_version": "0.27.2", "httplib": "requests", "lang": "python", "lang_version":
+        "3.10.5", "platform": "macOS-13.6-arm64-arm-64bit", "publisher": "openai",
+        "uname": "Darwin 22.6.0 Darwin Kernel Version 22.6.0: Fri Sep 15 13:41:28
+        PDT 2023; root:xnu-8796.141.3.700.8~1/RELEASE_ARM64_T6000 arm64"}'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA1RSS2/iMBC+8ytGcwZESoE2t5bdK3tASCs1FTLOQFyccWSPu60Q/33lBEK5WNb3
+        8jx8GgCgKTEH1JUSXTd29DSdVdXqWL0+/3n9V69XR7/8y9ptYhl+b3CYHG73QVqurrF2dWNJjOOO
+        1p6UUErN5s+zxeNiPslaonYl2WQ7NDKajmcjiX7nRpN5Nr04K2c0BczhbQAAcGrPVCOX9IU5TIZX
+        pKYQ1IEw70UA6J1NCKoQTBDFgsMbqR0LcSqbo7U/iH1knarfamXtXSAAsqrbSPoSr7Rsg8SSWLaG
+        9+5HOgAqf4g1saTy8VQwQNG6C8yhwF/q05SwOsRv4gKHHV2rD+c7Pg0xCnkI2hBr6jVBV87ZTrQW
+        xXvnS9iw+SQfjHz3uoNXJYWkm46fLpi2cddCbwUuKwoBljbuChymLBelgpdgFMO6awpeQnDaqDSM
+        At8LPmPf4PlyO/cb2Bs2odp6UsFx6vl+joOr671dbbzbFjbe1Y1sxR2J08Cy2aILxttvurE9KU6U
+        veEP2eMgPXIe/AcAAP//AwB/irZaygIAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 80cd0f8a0c8241f5-EWR
+      Cache-Control:
+      - no-cache, must-revalidate
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 26 Sep 2023 17:00:01 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-model:
+      - gpt-3.5-turbo-0613
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '692'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '1000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '999910'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 5ms
+      x-request-id:
+      - 3b5828ba018e216550b170f2c77f88f3
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/openai/test_openai.py b/tests/contrib/openai/test_openai.py
@@ -572,6 +572,46 @@ def test_chat_completion(api_key_in_env, request_api_key, openai, openai_vcr, sn
             )
 
 
+@pytest.mark.snapshot(ignores=["meta.http.useragent"])
+def test_chat_completion_function_calling(openai, openai_vcr, snapshot_tracer):
+    if not hasattr(openai, "ChatCompletion"):
+        pytest.skip("ChatCompletion not supported for this version of openai")
+    student_description = """
+    David Nguyen is a sophomore majoring in computer science at Stanford University and has a GPA of 3.8.
+    David is an active member of the university's Chess Club and the South Asian Student Association.
+    He hopes to pursue a career in software engineering after graduating.
+    """
+    student_custom_functions = [
+        {
+            "name": "extract_student_info",
+            "description": "Get the student information from the body of the input text",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string", "description": "Name of the person"},
+                    "major": {"type": "string", "description": "Major subject."},
+                    "school": {"type": "string", "description": "The university name."},
+                    "grades": {"type": "integer", "description": "GPA of the student."},
+                    "clubs": {
+                        "type": "array",
+                        "description": "School clubs for extracurricular activities. ",
+                        "items": {"type": "string", "description": "Name of School Club"},
+                    },
+                },
+            },
+        },
+    ]
+
+    with openai_vcr.use_cassette("chat_completion_function_call.yaml"):
+        openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": student_description}],
+            functions=student_custom_functions,
+            function_call="auto",
+            user="ddtrace-test",
+        )
+
+
 @pytest.mark.parametrize("ddtrace_config_openai", [dict(metrics_enabled=b) for b in [True, False]])
 def test_enable_metrics(openai, openai_vcr, ddtrace_config_openai, mock_metrics, mock_tracer):
     """Ensure the metrics_enabled configuration works."""
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_chat_completion_function_calling.json b/tests/snapshots/tests.contrib.openai.test_openai.test_chat_completion_function_calling.json
@@ -0,0 +1,49 @@
+[[
+  {
+    "name": "openai.request",
+    "service": null,
+    "resource": "createChatCompletion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "component": "openai",
+      "language": "python",
+      "openai.api_base": "https://api.openai.com/v1",
+      "openai.api_type": "open_ai",
+      "openai.organization.name": "datadog-4",
+      "openai.request.endpoint": "/v1/chat/completions",
+      "openai.request.messages.0.content": "\\n    David Nguyen is a sophomore majoring in computer science at Stanford University and has a GPA of 3.8.\\n    David is an act...",
+      "openai.request.messages.0.name": "",
+      "openai.request.messages.0.role": "user",
+      "openai.request.method": "POST",
+      "openai.request.model": "gpt-3.5-turbo",
+      "openai.request.user": "ddtrace-test",
+      "openai.response.choices.0.finish_reason": "function_call",
+      "openai.response.choices.0.message.content": "",
+      "openai.response.choices.0.message.name": "",
+      "openai.response.choices.0.message.role": "assistant",
+      "openai.response.id": "chatcmpl-835hhNkhB9OBwmSNkrCXncoUudsEU",
+      "openai.response.model": "gpt-3.5-turbo-0613",
+      "openai.user.api_key": "sk-...key>",
+      "runtime-id": "2fab29c9652a4617a2afcf0ff42a3fa9"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sample_rate": 1.0,
+      "_sampling_priority_v1": 1,
+      "openai.organization.ratelimit.requests.remaining": 9999,
+      "openai.organization.ratelimit.tokens.remaining": 999910,
+      "openai.response.choices_count": 1,
+      "openai.response.created": 1695747601,
+      "openai.response.usage.completion_tokens": 57,
+      "openai.response.usage.prompt_tokens": 157,
+      "openai.response.usage.total_tokens": 214,
+      "process_id": 26590
+    },
+    "duration": 1024064000,
+    "start": 1695747600754577000
+  }]]

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +fixes:
 +  - |
 +    openai: This fix resolves an issue where chat completion requests with function calls led to failing to
 +    tag null message content fields in the chat completion response.