openai-v2: handle with_raw_response streaming (#4033)

xrmx · web-flow · commit 3e7904f739f8 · 2025-12-16T12:19:46.000+01:00
* openai-v2: handle with_raw_response streaming

* Add changelog

* Add missing vcr recording
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#4017](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4017))
 - Add support for chat completions choice count and stop sequences span attributes
   ([#4028](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4028))
+- Fix crash with streaming `with_raw_response`
+  ([#4033](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4033))
 
 ## Version 2.2b0 (2025-11-25)
 
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@@ -701,3 +701,7 @@ def process_chunk(self, chunk):
         self.set_response_service_tier(chunk)
         self.build_streaming_response(chunk)
         self.set_usage(chunk)
+
+    def parse(self):
+        """Called when using with_raw_response with stream=True"""
+        return self
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_with_raw_response.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_with_raw_response.yaml
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_with_raw_response_streaming.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_with_raw_response_streaming.yaml
@@ -0,0 +1,143 @@
+interactions:
+- request:
+    body: |-
+      {
+        "messages": [
+          {
+            "role": "user",
+            "content": "Say this is a test"
+          }
+        ],
+        "model": "gpt-4o-mini",
+        "stream": true,
+        "stream_options": {
+          "include_usage": true
+        }
+      }
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '148'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 1.109.1
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - Linux
+      X-Stainless-Package-Version:
+      - 1.109.1
+      X-Stainless-Raw-Response:
+      - 'true'
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.12.12
+      authorization:
+      - Bearer test_openai_api_key
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: |+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"v3JkrR4kf"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":"This"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"c2Yj6Tq"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" is"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"vYP94Gjb"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" a"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"axQhTg4rR"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" test"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Sd4wYC"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ZufRh78gtk"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Si8PiPK"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DtALgOW"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xYwawpnRk"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Swpx"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"L6Pd0pV"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":" today"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Viytd"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"LqmsdvgjP8"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"vbpp9"}
+
+        data: {"id":"chatcmpl-CnMM0oFYQitzT43PYAvCrmNt6GIKs","object":"chat.completion.chunk","created":1765880036,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_644f11dd4d","choices":[],"usage":{"prompt_tokens":12,"completion_tokens":12,"total_tokens":24,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"xEbQODa0Ga"}
+
+        data: [DONE]
+
+    headers:
+      CF-RAY:
+      - 9aed6932dfb8ed9e-MXP
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Tue, 16 Dec 2025 10:13:56 GMT
+      Server:
+      - cloudflare
+      Set-Cookie: test_set_cookie
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization: test_openai_org_id
+      openai-processing-ms:
+      - '228'
+      openai-project:
+      - proj_Pf1eM5R55Z35wBy4rt8PxAGq
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '241'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '10000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '9999993'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_279d1848f0cf450dbffc9d7776f157f7
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py
@@ -299,6 +299,63 @@ async def test_async_chat_completion_with_raw_repsonse(
     assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0])
 
 
+@pytest.mark.vcr()
+@pytest.mark.asyncio()
+async def test_chat_completion_with_raw_response_streaming(
+    span_exporter, log_exporter, async_openai_client, instrument_with_content
+):
+    llm_model_value = "gpt-4o-mini"
+    messages_value = [{"role": "user", "content": "Say this is a test"}]
+    raw_response = (
+        await async_openai_client.chat.completions.with_raw_response.create(
+            messages=messages_value,
+            model=llm_model_value,
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+    )
+    response = raw_response.parse()
+
+    message_content = ""
+    async for chunk in response:
+        if chunk.choices:
+            message_content += chunk.choices[0].delta.content or ""
+        # get the last chunk
+        if getattr(chunk, "usage", None):
+            response_stream_usage = chunk.usage
+            response_stream_model = chunk.model
+            response_stream_id = chunk.id
+
+    spans = span_exporter.get_finished_spans()
+    assert_all_attributes(
+        spans[0],
+        llm_model_value,
+        response_stream_id,
+        response_stream_model,
+        response_stream_usage.prompt_tokens,
+        response_stream_usage.completion_tokens,
+        response_service_tier="default",
+    )
+
+    logs = log_exporter.get_finished_logs()
+    assert len(logs) == 2
+
+    user_message = {"content": messages_value[0]["content"]}
+    assert_message_in_logs(
+        logs[0], "gen_ai.user.message", user_message, spans[0]
+    )
+
+    choice_event = {
+        "index": 0,
+        "finish_reason": "stop",
+        "message": {
+            "role": "assistant",
+            "content": message_content,
+        },
+    }
+    assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0])
+
+
 @pytest.mark.vcr()
 @pytest.mark.asyncio()
 async def test_async_chat_completion_tool_calls_with_content(
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py
@@ -412,7 +412,7 @@ def test_chat_completion_multiple_choices(
 
 
 @pytest.mark.vcr()
-def test_chat_completion_with_raw_repsonse(
+def test_chat_completion_with_raw_response(
     span_exporter, log_exporter, openai_client, instrument_with_content
 ):
     llm_model_value = "gpt-4o-mini"
@@ -451,6 +451,60 @@ def test_chat_completion_with_raw_repsonse(
     assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0])
 
 
+@pytest.mark.vcr()
+def test_chat_completion_with_raw_response_streaming(
+    span_exporter, log_exporter, openai_client, instrument_with_content
+):
+    llm_model_value = "gpt-4o-mini"
+    messages_value = [{"role": "user", "content": "Say this is a test"}]
+    raw_response = openai_client.chat.completions.with_raw_response.create(
+        messages=messages_value,
+        model=llm_model_value,
+        stream=True,
+        stream_options={"include_usage": True},
+    )
+    response = raw_response.parse()
+
+    message_content = ""
+    for chunk in response:
+        if chunk.choices:
+            message_content += chunk.choices[0].delta.content or ""
+        # get the last chunk
+        if getattr(chunk, "usage", None):
+            response_stream_usage = chunk.usage
+            response_stream_model = chunk.model
+            response_stream_id = chunk.id
+
+    spans = span_exporter.get_finished_spans()
+    assert_all_attributes(
+        spans[0],
+        llm_model_value,
+        response_stream_id,
+        response_stream_model,
+        response_stream_usage.prompt_tokens,
+        response_stream_usage.completion_tokens,
+        response_service_tier="default",
+    )
+
+    logs = log_exporter.get_finished_logs()
+    assert len(logs) == 2
+
+    user_message = {"content": messages_value[0]["content"]}
+    assert_message_in_logs(
+        logs[0], "gen_ai.user.message", user_message, spans[0]
+    )
+
+    choice_event = {
+        "index": 0,
+        "finish_reason": "stop",
+        "message": {
+            "role": "assistant",
+            "content": message_content,
+        },
+    }
+    assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0])
+
+
 @pytest.mark.vcr()
 def test_chat_completion_tool_calls_with_content(
     span_exporter, log_exporter, openai_client, instrument_with_content