fix(openai): add openai api key in individual request [backport #5846 to 1.13] (#5850)

Yun-Kim · ZStriker19 · web-flow · commit bbba41cc8010 · 2023-05-11T11:53:31.000-04:00
Backports #5846 to 1.13. Fixes #5828. This PR adds tagging the openAI API key if it is set as part of an individual API request. Previously we assumed that it would always be set as an env var and therefore always available from `openai.api_key`, but some users do not set the API key that way and instead set it per individual request, which caused our application to crash. This change fixes that issue and allows both use cases. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) are followed. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] OPTIONAL: PR description includes explicit acknowledgement of the performance implications of the change as reported in the benchmarks PR comment. ## Reviewer Checklist - [ ] Title is accurate. - [ ] No unnecessary changes are introduced. - [ ] Description motivates each change. - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Testing strategy adequately addresses listed risk(s). - [ ] Change is maintainable (easy to change, telemetry, documentation). - [ ] Release note makes sense to a user of the library. - [ ] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. Co-authored-by: Zachary Groves <32471391+ZStriker19@users.noreply.github.com>
diff --git a/ddtrace/contrib/openai/patch.py b/ddtrace/contrib/openai/patch.py
@@ -4,6 +4,7 @@
 import time
 from typing import AsyncGenerator
 from typing import Generator
+from typing import Optional
 from typing import TYPE_CHECKING
 
 from ddtrace import config
@@ -76,15 +77,18 @@ def start_log_writer(self):
 
     @property
     def _user_api_key(self):
-        # type: () -> str
+        # type: () -> Optional[str]
         """Get a representation of the user API key for tagging."""
         # Match the API key representation that OpenAI uses in their UI.
+        if self._openai.api_key is None:
+            return
         return "sk-...%s" % self._openai.api_key[-4:]
 
     def set_base_span_tags(self, span):
         # type: (Span) -> None
         span.set_tag_str(COMPONENT, self._config.integration_name)
-        span.set_tag_str("openai.user.api_key", self._user_api_key)
+        if self._user_api_key is not None:
+            span.set_tag_str("openai.user.api_key", self._user_api_key)
 
         # Do these dynamically as openai users can set these at any point
         # not necessarily before patch() time.
@@ -292,6 +296,10 @@ def _patched_make_session(func, args, kwargs):
 
 def _traced_endpoint(endpoint_hook, integration, pin, args, kwargs):
     span = integration.trace(pin, args[0].OBJECT_NAME, kwargs.get("model"))
+    openai_api_key = kwargs.get("api_key")
+    if openai_api_key:
+        # API key can either be set on the import or per request
+        span.set_tag_str("openai.user.api_key", "sk-...%s" % openai_api_key[-4:])
     try:
         # Start the hook
         hook = endpoint_hook().handle_request(pin, integration, span, args, kwargs)
diff --git a/releasenotes/notes/fix-openai-api-key-f7964ca702ddd126.yaml b/releasenotes/notes/fix-openai-api-key-f7964ca702ddd126.yaml
@@ -0,0 +1,6 @@
+---
+fixes:
+  - |
+    OpenAI: Resolved an issue where OpenAI API keys set in individual requests rather than as an
+    environment variable caused an error in the integration.
+
diff --git a/tests/contrib/openai/test_openai.py b/tests/contrib/openai/test_openai.py
@@ -48,6 +48,23 @@ def openai_vcr():
     yield get_openai_vcr()
 
 
+@pytest.fixture
+def api_key_in_env():
+    return True
+
+
+@pytest.fixture
+def request_api_key(api_key_in_env, openai_api_key):
+    """
+    OpenAI allows both using an env var or a specified param for the API key, so this fixture specifies the API key
+    (or None) to be used in the actual request param. If the API key is set as an env var, this should return None
+    to make sure the env var will be used.
+    """
+    if api_key_in_env:
+        return None
+    return openai_api_key
+
+
 @pytest.fixture
 def openai_api_key():
     return os.getenv("OPENAI_API_KEY", "<not-a-real-key>")
@@ -59,10 +76,11 @@ def openai_organization():
 
 
 @pytest.fixture
-def openai(openai_api_key, openai_organization):
+def openai(openai_api_key, openai_organization, api_key_in_env):
     import openai
 
-    openai.api_key = openai_api_key
+    if api_key_in_env:
+        openai.api_key = openai_api_key
     openai.organization = openai_organization
     yield openai
     # Since unpatching doesn't work (see the unpatch() function),
@@ -114,9 +132,10 @@ def ddtrace_config_openai():
 
 
 @pytest.fixture
-def patch_openai(ddtrace_config_openai, openai_api_key, openai_organization):
+def patch_openai(ddtrace_config_openai, openai_api_key, openai_organization, api_key_in_env):
     with override_config("openai", ddtrace_config_openai):
-        openai.api_key = openai_api_key
+        if api_key_in_env:
+            openai.api_key = openai_api_key
         openai.organization = openai_organization
         patch(openai=True)
         yield
@@ -198,10 +217,11 @@ def test_patching(openai):
 
 
 @pytest.mark.snapshot(ignores=["meta.http.useragent"])
-def test_completion(openai, openai_vcr, mock_metrics, snapshot_tracer):
+@pytest.mark.parametrize("api_key_in_env", [True, False])
+def test_completion(api_key_in_env, request_api_key, openai, openai_vcr, mock_metrics, snapshot_tracer):
     with openai_vcr.use_cassette("completion.yaml"):
         resp = openai.Completion.create(
-            model="ada", prompt="Hello world", temperature=0.8, n=2, stop=".", max_tokens=10
+            api_key=request_api_key, model="ada", prompt="Hello world", temperature=0.8, n=2, stop=".", max_tokens=10
         )
 
     assert resp["object"] == "text_completion"
@@ -271,10 +291,18 @@ def test_completion(openai, openai_vcr, mock_metrics, snapshot_tracer):
 
 @pytest.mark.asyncio
 @pytest.mark.snapshot(ignores=["meta.http.useragent"])
-async def test_acompletion(openai, openai_vcr, mock_metrics, mock_logs, snapshot_tracer):
+@pytest.mark.parametrize("api_key_in_env", [True, False])
+async def test_acompletion(
+    api_key_in_env, request_api_key, openai, openai_vcr, mock_metrics, mock_logs, snapshot_tracer
+):
     with openai_vcr.use_cassette("completion_async.yaml"):
         resp = await openai.Completion.acreate(
-            model="curie", prompt="As Descartes said, I think, therefore", temperature=0.8, n=1, max_tokens=150
+            api_key=request_api_key,
+            model="curie",
+            prompt="As Descartes said, I think, therefore",
+            temperature=0.8,
+            n=1,
+            max_tokens=150,
         )
     assert resp["object"] == "text_completion"
     assert resp["choices"] == [
@@ -457,12 +485,14 @@ def test_global_tags(openai_vcr, ddtrace_config_openai, openai, mock_metrics, mo
 
 
 @pytest.mark.snapshot(ignores=["meta.http.useragent"])
-def test_chat_completion(openai, openai_vcr, snapshot_tracer):
+@pytest.mark.parametrize("api_key_in_env", [True, False])
+def test_chat_completion(api_key_in_env, request_api_key, openai, openai_vcr, snapshot_tracer):
     if not hasattr(openai, "ChatCompletion"):
         pytest.skip("ChatCompletion not supported for this version of openai")
 
     with openai_vcr.use_cassette("chat_completion.yaml"):
         openai.ChatCompletion.create(
+            api_key=request_api_key,
             model="gpt-3.5-turbo",
             messages=[
                 {"role": "system", "content": "You are a helpful assistant."},
@@ -488,11 +518,13 @@ def test_enable_metrics(openai, openai_vcr, ddtrace_config_openai, mock_metrics,
 
 @pytest.mark.asyncio
 @pytest.mark.snapshot(ignores=["meta.http.useragent"])
-async def test_achat_completion(openai, openai_vcr, snapshot_tracer):
+@pytest.mark.parametrize("api_key_in_env", [True, False])
+async def test_achat_completion(api_key_in_env, request_api_key, openai, openai_vcr, snapshot_tracer):
     if not hasattr(openai, "ChatCompletion"):
         pytest.skip("ChatCompletion not supported for this version of openai")
     with openai_vcr.use_cassette("chat_completion_async.yaml"):
         await openai.ChatCompletion.acreate(
+            api_key=request_api_key,
             model="gpt-3.5-turbo",
             messages=[
                 {"role": "system", "content": "You are a helpful assistant."},
@@ -506,20 +538,22 @@ async def test_achat_completion(openai, openai_vcr, snapshot_tracer):
 
 
 @pytest.mark.snapshot(ignores=["meta.http.useragent"])
-def test_embedding(openai, openai_vcr, snapshot_tracer):
+@pytest.mark.parametrize("api_key_in_env", [True, False])
+def test_embedding(api_key_in_env, request_api_key, openai, openai_vcr, snapshot_tracer):
     if not hasattr(openai, "Embedding"):
         pytest.skip("embedding not supported for this version of openai")
     with openai_vcr.use_cassette("embedding.yaml"):
-        openai.Embedding.create(input="hello world", model="text-embedding-ada-002")
+        openai.Embedding.create(api_key=request_api_key, input="hello world", model="text-embedding-ada-002")
 
 
 @pytest.mark.asyncio
 @pytest.mark.snapshot(ignores=["meta.http.useragent"])
-async def test_aembedding(openai, openai_vcr, snapshot_tracer):
+@pytest.mark.parametrize("api_key_in_env", [True, False])
+async def test_aembedding(api_key_in_env, request_api_key, openai, openai_vcr, snapshot_tracer):
     if not hasattr(openai, "Embedding"):
         pytest.skip("embedding not supported for this version of openai")
     with openai_vcr.use_cassette("embedding_async.yaml"):
-        await openai.Embedding.acreate(input="hello world", model="text-embedding-ada-002")
+        await openai.Embedding.acreate(api_key=request_api_key, input="hello world", model="text-embedding-ada-002")
 
 
 @pytest.mark.snapshot(ignores=["meta.http.useragent"])
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_achat_completion[False].json b/tests/snapshots/tests.contrib.openai.test_openai.test_achat_completion[False].json
@@ -33,7 +33,7 @@
       "openai.response.choices.1.message.role": "assistant",
       "openai.response.object": "chat.completion",
       "openai.user.api_key": "sk-...key>",
-      "runtime-id": "8154bd1813ea422fa959cacf3fdfa1bc"
+      "runtime-id": "89c1a536548b4fbe8ba0d2fbc57a519b"
     },
     "metrics": {
       "_dd.agent_psr": 1.0,
@@ -46,8 +46,8 @@
       "openai.response.usage.completion_tokens": 34,
       "openai.response.usage.prompt_tokens": 57,
       "openai.response.usage.total_tokens": 91,
-      "process_id": 14806
+      "process_id": 83462
     },
-    "duration": 1189000,
-    "start": 1683148556484839000
+    "duration": 1216000,
+    "start": 1683752002975282000
   }]]
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_achat_completion[True].json b/tests/snapshots/tests.contrib.openai.test_openai.test_achat_completion[True].json
@@ -0,0 +1,53 @@
+[[
+  {
+    "name": "openai.request",
+    "service": "",
+    "resource": "chat.completions/gpt-3.5-turbo",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "api_base": "https://api.openai.com/v1",
+      "component": "openai",
+      "language": "python",
+      "openai.endpoint": "chat.completions",
+      "openai.model": "gpt-3.5-turbo",
+      "openai.organization.name": "datadog-4",
+      "openai.organization.ratelimit.requests.remaining": "3499",
+      "openai.request.messages.0.content": "You are a helpful assistant.",
+      "openai.request.messages.0.role": "system",
+      "openai.request.messages.1.content": "Who won the world series in 2020?",
+      "openai.request.messages.1.role": "user",
+      "openai.request.messages.2.content": "The Los Angeles Dodgers won the World Series in 2020.",
+      "openai.request.messages.2.role": "assistant",
+      "openai.request.messages.3.content": "Where was it played?",
+      "openai.request.messages.3.role": "user",
+      "openai.response.choices.0.finish_reason": "stop",
+      "openai.response.choices.0.message.content": "The 2020 World Series was played at Globe Life Field in Arlington, Texas.",
+      "openai.response.choices.0.message.role": "assistant",
+      "openai.response.choices.1.finish_reason": "stop",
+      "openai.response.choices.1.message.content": "The 2020 World Series was played in Globe Life Field in Arlington, Texas.",
+      "openai.response.choices.1.message.role": "assistant",
+      "openai.response.object": "chat.completion",
+      "openai.user.api_key": "sk-...key>",
+      "runtime-id": "89c1a536548b4fbe8ba0d2fbc57a519b"
+    },
+    "metrics": {
+      "_dd.agent_psr": 1.0,
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "openai.request.n": 2,
+      "openai.request.top_p": 0.9,
+      "openai.response.usage.completion_tokens": 34,
+      "openai.response.usage.prompt_tokens": 57,
+      "openai.response.usage.total_tokens": 91,
+      "process_id": 83462
+    },
+    "duration": 1193000,
+    "start": 1683752002947578000
+  }]]
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_acompletion[False].json b/tests/snapshots/tests.contrib.openai.test_openai.test_acompletion[False].json
@@ -24,7 +24,7 @@
       "openai.response.choices.0.text": " I am; and I am in a sense a non-human entity woven together from memories, desires and emotions. But, who is to say that I am n...",
       "openai.response.object": "text_completion",
       "openai.user.api_key": "sk-...key>",
-      "runtime-id": "8154bd1813ea422fa959cacf3fdfa1bc"
+      "runtime-id": "89c1a536548b4fbe8ba0d2fbc57a519b"
     },
     "metrics": {
       "_dd.agent_psr": 1.0,
@@ -39,8 +39,8 @@
       "openai.response.usage.completion_tokens": 150,
       "openai.response.usage.prompt_tokens": 10,
       "openai.response.usage.total_tokens": 160,
-      "process_id": 14806
+      "process_id": 83462
     },
-    "duration": 1166000,
-    "start": 1683148556287285000
+    "duration": 1173000,
+    "start": 1683752002788693000
   }]]
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_acompletion[True].json b/tests/snapshots/tests.contrib.openai.test_openai.test_acompletion[True].json
@@ -0,0 +1,46 @@
+[[
+  {
+    "name": "openai.request",
+    "service": "",
+    "resource": "completions/curie",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "api_base": "https://api.openai.com/v1",
+      "component": "openai",
+      "language": "python",
+      "openai.endpoint": "completions",
+      "openai.model": "curie",
+      "openai.organization.name": "datadog-4",
+      "openai.organization.ratelimit.requests.remaining": "2999",
+      "openai.organization.ratelimit.tokens.remaining": "249850",
+      "openai.request.prompt": "As Descartes said, I think, therefore",
+      "openai.response.choices.0.finish_reason": "length",
+      "openai.response.choices.0.logprobs": "returned",
+      "openai.response.choices.0.text": " I am; and I am in a sense a non-human entity woven together from memories, desires and emotions. But, who is to say that I am n...",
+      "openai.response.object": "text_completion",
+      "openai.user.api_key": "sk-...key>",
+      "runtime-id": "89c1a536548b4fbe8ba0d2fbc57a519b"
+    },
+    "metrics": {
+      "_dd.agent_psr": 1.0,
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "openai.request.max_tokens": 150,
+      "openai.request.n": 1,
+      "openai.request.temperature": 0.8,
+      "openai.response.choices.num": 1,
+      "openai.response.usage.completion_tokens": 150,
+      "openai.response.usage.prompt_tokens": 10,
+      "openai.response.usage.total_tokens": 160,
+      "process_id": 83462
+    },
+    "duration": 1357000,
+    "start": 1683752002765150000
+  }]]
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_aembedding[False].json b/tests/snapshots/tests.contrib.openai.test_openai.test_aembedding[False].json
@@ -20,7 +20,7 @@
       "openai.request.input": "hello world",
       "openai.request.model": "text-embedding-ada-002",
       "openai.user.api_key": "sk-...key>",
-      "runtime-id": "8154bd1813ea422fa959cacf3fdfa1bc"
+      "runtime-id": "89c1a536548b4fbe8ba0d2fbc57a519b"
     },
     "metrics": {
       "_dd.agent_psr": 1.0,
@@ -32,8 +32,8 @@
       "openai.response.data.num-embeddings": 1,
       "openai.response.usage.prompt_tokens": 2,
       "openai.response.usage.total_tokens": 2,
-      "process_id": 14806
+      "process_id": 83462
     },
-    "duration": 1309000,
-    "start": 1683148556521424000
+    "duration": 1150000,
+    "start": 1683752003073992000
   }]]
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_aembedding[True].json b/tests/snapshots/tests.contrib.openai.test_openai.test_aembedding[True].json
@@ -20,7 +20,7 @@
       "openai.request.input": "hello world",
       "openai.request.model": "text-embedding-ada-002",
       "openai.user.api_key": "sk-...key>",
-      "runtime-id": "8154bd1813ea422fa959cacf3fdfa1bc"
+      "runtime-id": "89c1a536548b4fbe8ba0d2fbc57a519b"
     },
     "metrics": {
       "_dd.agent_psr": 1.0,
@@ -32,8 +32,8 @@
       "openai.response.data.num-embeddings": 1,
       "openai.response.usage.prompt_tokens": 2,
       "openai.response.usage.total_tokens": 2,
-      "process_id": 14806
+      "process_id": 83462
     },
-    "duration": 2550000,
-    "start": 1683148556502335000
+    "duration": 1158000,
+    "start": 1683752003050750000
   }]]
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_chat_completion[False].json b/tests/snapshots/tests.contrib.openai.test_openai.test_chat_completion[False].json
@@ -33,7 +33,7 @@
       "openai.response.choices.1.message.role": "assistant",
       "openai.response.object": "chat.completion",
       "openai.user.api_key": "sk-...key>",
-      "runtime-id": "8154bd1813ea422fa959cacf3fdfa1bc"
+      "runtime-id": "89c1a536548b4fbe8ba0d2fbc57a519b"
     },
     "metrics": {
       "_dd.agent_psr": 1.0,
@@ -46,8 +46,8 @@
       "openai.response.usage.completion_tokens": 34,
       "openai.response.usage.prompt_tokens": 57,
       "openai.response.usage.total_tokens": 91,
-      "process_id": 14806
+      "process_id": 83462
     },
-    "duration": 2540000,
-    "start": 1683148556436719000
+    "duration": 2907000,
+    "start": 1683752002886173000
   }]]
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_chat_completion[True].json b/tests/snapshots/tests.contrib.openai.test_openai.test_chat_completion[True].json
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_completion[False].json b/tests/snapshots/tests.contrib.openai.test_openai.test_completion[False].json
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_completion[True].json b/tests/snapshots/tests.contrib.openai.test_openai.test_completion[True].json
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding[False].json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding[False].json
diff --git a/tests/snapshots/tests.contrib.openai.test_openai.test_embedding[True].json b/tests/snapshots/tests.contrib.openai.test_openai.test_embedding[True].json

-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +---
 +fixes:
 +  - |
 +    OpenAI: Resolved an issue where OpenAI API keys set in individual requests rather than as an
 +    environment variable caused an error in the integration.
++