chore(openai): transition to allowlist for openai llmobs span metadata (#14100)

ncybul · web-flow · commit 6036aa9ef800 · 2025-07-24T10:06:38.000-04:00
Using an allow list instead of the current deny list reduces the risk of potentially sensitive data being tagged in LLM Obs spans. This PR updates the Open AI LLM Obs integration to use an allow list when tagging metadata using request kwargs. There is a separate allow list for each of the Open AI endpoints based on the Open AI API reference: - [completions](https://platform.openai.com/docs/api-reference/completions/create) - [chat](https://platform.openai.com/docs/api-reference/chat/create) - [responses](https://platform.openai.com/docs/api-reference/responses/create) I included every kwarg except those that were in the old deny list or `metadata` which could contain information that we cannot verify is safe to tag on our spans. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
@@ -19,7 +19,6 @@
 from ddtrace.llmobs._constants import INPUT_MESSAGES
 from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import INPUT_VALUE
-from ddtrace.llmobs._constants import LITELLM_ROUTER_INSTANCE_KEY
 from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import OAI_HANDOFF_TOOL_ARG
 from ddtrace.llmobs._constants import OUTPUT_MESSAGES
@@ -39,31 +38,71 @@
 
 logger = get_logger(__name__)
 
-OPENAI_SKIPPED_COMPLETION_TAGS = (
-    "model",
-    "prompt",
-    "api_key",
-    "user_api_key",
-    "user_api_key_hash",
-    LITELLM_ROUTER_INSTANCE_KEY,
+COMMON_METADATA_KEYS = (
+    "stream",
+    "temperature",
+    "top_p",
+    "user",
 )
-OPENAI_SKIPPED_CHAT_TAGS = (
-    "model",
-    "messages",
+OPENAI_METADATA_RESPONSE_KEYS = (
+    "background",
+    "include",
+    "max_output_tokens",
+    "max_tool_calls",
+    "parallel_tool_calls",
+    "previous_response_id",
+    "prompt",
+    "reasoning",
+    "service_tier",
+    "store",
+    "text",
+    "tool_choice",
     "tools",
-    "functions",
-    "api_key",
-    "user_api_key",
-    "user_api_key_hash",
-    LITELLM_ROUTER_INSTANCE_KEY,
+    "top_logprobs",
+    "truncation",
+)
+OPENAI_METADATA_CHAT_KEYS = (
+    "audio",
+    "frequency_penalty",
+    "function_call",
+    "logit_bias",
+    "logprobs",
+    "max_completion_tokens",
+    "max_tokens",
+    "modalities",
+    "n",
+    "parallel_tool_calls",
+    "prediction",
+    "presence_penalty",
+    "reasoning_effort",
+    "response_format",
+    "seed",
+    "service_tier",
+    "stop",
+    "store",
+    "stream_options",
+    "tool_choice",
+    "top_logprobs",
+    "web_search_options",
+)
+OPENAI_METADATA_COMPLETION_KEYS = (
+    "best_of",
+    "echo",
+    "frequency_penalty",
+    "logit_bias",
+    "logprobs",
+    "max_tokens",
+    "n",
+    "presence_penalty",
+    "seed",
+    "stop",
+    "stream_options",
+    "suffix",
 )
 
 LITELLM_METADATA_CHAT_KEYS = (
     "timeout",
-    "temperature",
-    "top_p",
     "n",
-    "stream",
     "stream_options",
     "stop",
     "max_completion_tokens",
@@ -73,7 +112,6 @@
     "presence_penalty",
     "frequency_penalty",
     "logit_bias",
-    "user",
     "response_format",
     "seed",
     "tool_choice",
@@ -97,12 +135,8 @@
     "n",
     "presence_penalty",
     "stop",
-    "stream",
     "stream_options",
     "suffix",
-    "temperature",
-    "top_p",
-    "user",
     "api_base",
     "api_version",
     "model_list",
@@ -471,12 +505,12 @@ def get_metadata_from_kwargs(
     kwargs: Dict[str, Any], integration_name: str = "openai", operation: str = "chat"
 ) -> Dict[str, Any]:
     metadata = {}
+    keys_to_include: Tuple[str, ...] = COMMON_METADATA_KEYS
     if integration_name == "openai":
-        keys_to_skip = OPENAI_SKIPPED_CHAT_TAGS if operation == "chat" else OPENAI_SKIPPED_COMPLETION_TAGS
-        metadata = {k: v for k, v in kwargs.items() if k not in keys_to_skip}
+        keys_to_include += OPENAI_METADATA_CHAT_KEYS if operation == "chat" else OPENAI_METADATA_COMPLETION_KEYS
     elif integration_name == "litellm":
-        keys_to_include = LITELLM_METADATA_CHAT_KEYS if operation == "chat" else LITELLM_METADATA_COMPLETION_KEYS
-        metadata = {k: v for k, v in kwargs.items() if k in keys_to_include}
+        keys_to_include += LITELLM_METADATA_CHAT_KEYS if operation == "chat" else LITELLM_METADATA_COMPLETION_KEYS
+    metadata = {k: v for k, v in kwargs.items() if k in keys_to_include}
     return metadata
 
 
@@ -621,7 +655,7 @@ def openai_get_metadata_from_response(
     metadata = {}
 
     if kwargs:
-        metadata.update({k: v for k, v in kwargs.items() if k not in ("model", "input", "instructions")})
+        metadata.update({k: v for k, v in kwargs.items() if k in OPENAI_METADATA_RESPONSE_KEYS + COMMON_METADATA_KEYS})
 
     if not response:
         return metadata