Fix OpenAI streaming reasoning (#1232)

alexmojaki · web-flow · commit 938c2128fa2a · 2025-07-07T22:29:58.000+02:00
diff --git a/logfire/_internal/integrations/llm_providers/openai.py b/logfire/_internal/integrations/llm_providers/openai.py
@@ -133,7 +133,10 @@ def __init__(self):
             )
 
         def record_chunk(self, chunk: ChatCompletionChunk) -> None:
-            self._stream_state.handle_chunk(chunk)
+            try:
+                self._stream_state.handle_chunk(chunk)
+            except Exception:
+                pass
 
         def get_response_data(self) -> Any:
             try:
@@ -142,10 +145,12 @@ def get_response_data(self) -> Any:
                 # AssertionError is raised when there is no completion snapshot
                 # Return empty content to show an empty Assistant response in the UI
                 return {'combined_chunk_content': '', 'chunk_count': 0}
-            return {
-                'message': final_completion.choices[0].message if final_completion.choices else None,
-                'usage': final_completion.usage,
-            }
+            if final_completion.choices:
+                message = final_completion.choices[0].message
+                message.role = 'assistant'
+            else:
+                message = None
+            return {'message': message, 'usage': final_completion.usage}
 except ImportError:  # pragma: no cover
     OpenaiChatCompletionStreamState = OpenaiCompletionStreamState  # type: ignore
 
diff --git a/tests/otel_integrations/cassettes/test_openai/test_openrouter_streaming_reasoning.yaml b/tests/otel_integrations/cassettes/test_openai/test_openrouter_streaming_reasoning.yaml
@@ -0,0 +1,213 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello, how are you? (This is a trick
+      question)"}],"model":"google/gemini-2.5-flash","stream":true,"reasoning":{"effort":"low"}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '166'
+      content-type:
+      - application/json
+      host:
+      - openrouter.ai
+      user-agent:
+      - OpenAI/Python 1.93.1
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.93.1
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.6
+    method: POST
+    uri: https://openrouter.ai/api/v1/chat/completions
+  response:
+    body:
+      string: ': OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"","reasoning":"**Interpreting
+        User Intent**\n\nI''m zeroing in on the core of the query. The \"how are you\"
+        is basic, but the \"trick question\" label is key. My focus is on decoding
+        what the user *really* wants. I''m anticipating something beyond a simple
+        pleasantry.\n\n\n","reasoning_details":[{"type":"reasoning.text","text":"**Interpreting
+        User Intent**\n\nI''m zeroing in on the core of the query. The \"how are you\"
+        is basic, but the \"trick question\" label is key. My focus is on decoding
+        what the user *really* wants. I''m anticipating something beyond a simple
+        pleasantry.\n\n\n","provider":"google-vertex"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"","reasoning":"**Deconstructing
+        the \"Trick\"**\n\nI''ve analyzed the intent: the trick lies in the expectation
+        gap between human and AI responses. The raw data is processed. I need to politely
+        but directly address the inherent lack of feeling, then shift focus to my
+        capabilities. I''m now drafting refined responses, iterating for a balance
+        of humor, clarity, and user engagement.\n\n\n","reasoning_details":[{"type":"reasoning.text","text":"**Deconstructing
+        the \"Trick\"**\n\nI''ve analyzed the intent: the trick lies in the expectation
+        gap between human and AI responses. The raw data is processed. I need to politely
+        but directly address the inherent lack of feeling, then shift focus to my
+        capabilities. I''m now drafting refined responses, iterating for a balance
+        of humor, clarity, and user engagement.\n\n\n","provider":"google-vertex"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        : OPENROUTER PROCESSING
+
+
+        data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"","reasoning":"**Formulating
+        the Perfect Response**\n\nThe response is evolving. I''ve created multiple
+        drafts, each aiming for the ideal blend of acknowledging the \"trick,\" explaining
+        my AI limitations, and offering assistance. I''m now focused on fine-tuning
+        the tone. I''m evaluating options for humor to enhance the user experience.
+        I''m experimenting with different phrasing to ensure the response is both
+        clever and clear, avoiding being overly verbose.\n\n\n","reasoning_details":[{"type":"reasoning.text","text":"**Formulating
+        the Perfect Response**\n\nThe response is evolving. I''ve created multiple
+        drafts, each aiming for the ideal blend of acknowledging the \"trick,\" explaining
+        my AI limitations, and offering assistance. I''m now focused on fine-tuning
+        the tone. I''m evaluating options for humor to enhance the user experience.
+        I''m experimenting with different phrasing to ensure the response is both
+        clever and clear, avoiding being overly verbose.\n\n\n","provider":"google-vertex"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
+
+
+        : OPENROUTER PROCESSING
+
+
+        data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"","reasoning":"**Optimizing
+        Response Structure**\n\nI''m now focused on the final polishing stage. The
+        draft is finalized; it acknowledges the question''s nature, explains my AI
+        limitations, offers a functional answer, and re-engages the user. I''m tweaking
+        the flow, tightening the language, and optimizing the response''s overall
+        impact, aiming for maximum clarity and user satisfaction.\n\n\n","reasoning_details":[{"type":"reasoning.text","text":"**Optimizing
+        Response Structure**\n\nI''m now focused on the final polishing stage. The
+        draft is finalized; it acknowledges the question''s nature, explains my AI
+        limitations, offers a functional answer, and re-engages the user. I''m tweaking
+        the flow, tightening the language, and optimizing the response''s overall
+        impact, aiming for maximum clarity and user satisfaction.\n\n\n","provider":"google-vertex"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
+
+
+        data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"That''s
+        a clever way to put it! You''re right, it is a bit of a trick question for
+        an AI.\n\nAs a large language model, I don''t experience emotions, have a
+        physical body, or","reasoning":null,"reasoning_details":[]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
+
+
+        data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"
+        \"feel\" things in the human sense, so I can''t really quantify \"how\" I
+        am.\n\nHowever, I am fully operational, my systems are running smoothly, and
+        I''m ready to assist you!\n\nSo, while I can''t genuinely answer it for myself,
+        how are *you* doing today","reasoning":null,"reasoning_details":[]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
+
+
+        data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":",
+        and what can I help you with?","reasoning":null,"reasoning_details":[]},"finish_reason":"stop","native_finish_reason":"STOP","logprobs":null}]}
+
+
+        data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"usage":{"prompt_tokens":13,"completion_tokens":1003,"total_tokens":1016}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      Access-Control-Allow-Origin:
+      - '*'
+      CF-RAY:
+      - 95b9f5aaead53f1f-CPT
+      Cache-Control:
+      - no-cache
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream
+      Date:
+      - Mon, 07 Jul 2025 20:06:03 GMT
+      Permissions-Policy:
+      - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com"
+        "https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com")
+      Referrer-Policy:
+      - no-referrer, strict-origin-when-cross-origin
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Accept-Encoding
+      X-Content-Type-Options:
+      - nosniff
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/otel_integrations/test_openai.py b/tests/otel_integrations/test_openai.py
@@ -2149,3 +2149,149 @@ def test_responses_api_nonrecording(exporter: TestExporter, config_kwargs: dict[
     assert response.output_text == snapshot('Hello! How can I help you today? 😊')
 
     assert exporter.exported_spans_as_dict() == []
+
+
+@pytest.mark.vcr()
+def test_openrouter_streaming_reasoning(exporter: TestExporter) -> None:
+    client = openai.Client(base_url='https://openrouter.ai/api/v1')
+    logfire.instrument_openai(client)
+
+    response = client.chat.completions.create(
+        model='google/gemini-2.5-flash',
+        messages=[{'role': 'user', 'content': 'Hello, how are you? (This is a trick question)'}],
+        stream=True,
+        extra_body={'reasoning': {'effort': 'low'}},
+    )
+
+    for _ in response:
+        ...
+
+    assert exporter.exported_spans_as_dict(parse_json_attributes=True) == snapshot(
+        [
+            {
+                'name': 'Chat Completion with {request_data[model]!r}',
+                'context': {'trace_id': 1, 'span_id': 1, 'is_remote': False},
+                'parent': None,
+                'start_time': 1000000000,
+                'end_time': 2000000000,
+                'attributes': {
+                    'code.filepath': 'test_openai.py',
+                    'code.function': 'test_openrouter_streaming_reasoning',
+                    'code.lineno': 123,
+                    'request_data': {
+                        'messages': [{'role': 'user', 'content': 'Hello, how are you? (This is a trick question)'}],
+                        'model': 'google/gemini-2.5-flash',
+                        'stream': True,
+                    },
+                    'gen_ai.request.model': 'google/gemini-2.5-flash',
+                    'async': False,
+                    'logfire.msg_template': 'Chat Completion with {request_data[model]!r}',
+                    'logfire.msg': "Chat Completion with 'google/gemini-2.5-flash'",
+                    'logfire.json_schema': {
+                        'type': 'object',
+                        'properties': {'request_data': {'type': 'object'}, 'gen_ai.request.model': {}, 'async': {}},
+                    },
+                    'logfire.tags': ('LLM',),
+                    'logfire.span_type': 'span',
+                    'gen_ai.response.model': 'google/gemini-2.5-flash',
+                },
+            },
+            {
+                'name': 'streaming response from {request_data[model]!r} took {duration:.2f}s',
+                'context': {'trace_id': 2, 'span_id': 3, 'is_remote': False},
+                'parent': None,
+                'start_time': 5000000000,
+                'end_time': 5000000000,
+                'attributes': {
+                    'logfire.span_type': 'log',
+                    'logfire.level_num': 9,
+                    'logfire.msg_template': 'streaming response from {request_data[model]!r} took {duration:.2f}s',
+                    'logfire.msg': "streaming response from 'google/gemini-2.5-flash' took 1.00s",
+                    'code.filepath': 'test_openai.py',
+                    'code.function': 'test_openrouter_streaming_reasoning',
+                    'code.lineno': 123,
+                    'request_data': {
+                        'messages': [{'role': 'user', 'content': 'Hello, how are you? (This is a trick question)'}],
+                        'model': 'google/gemini-2.5-flash',
+                        'stream': True,
+                    },
+                    'gen_ai.request.model': 'google/gemini-2.5-flash',
+                    'async': False,
+                    'duration': 1.0,
+                    'response_data': {
+                        'message': {
+                            'content': """\
+That's a clever way to put it! You're right, it is a bit of a trick question for an AI.
+
+As a large language model, I don't experience emotions, have a physical body, or "feel" things in the human sense, so I can't really quantify "how" I am.
+
+However, I am fully operational, my systems are running smoothly, and I'm ready to assist you!
+
+So, while I can't genuinely answer it for myself, how are *you* doing today, and what can I help you with?\
+""",
+                            'refusal': None,
+                            'role': 'assistant',
+                            'annotations': None,
+                            'audio': None,
+                            'function_call': None,
+                            'tool_calls': None,
+                            'parsed': None,
+                            'reasoning': """\
+**Interpreting User Intent**
+
+I'm zeroing in on the core of the query. The "how are you" is basic, but the "trick question" label is key. My focus is on decoding what the user *really* wants. I'm anticipating something beyond a simple pleasantry.
+
+
+""",
+                            'reasoning_details': [
+                                {
+                                    'type': 'reasoning.text',
+                                    'text': """\
+**Interpreting User Intent**
+
+I'm zeroing in on the core of the query. The "how are you" is basic, but the "trick question" label is key. My focus is on decoding what the user *really* wants. I'm anticipating something beyond a simple pleasantry.
+
+
+""",
+                                    'provider': 'google-vertex',
+                                }
+                            ],
+                        },
+                        'usage': {
+                            'completion_tokens': 1003,
+                            'prompt_tokens': 13,
+                            'total_tokens': 1016,
+                            'completion_tokens_details': None,
+                            'prompt_tokens_details': None,
+                        },
+                    },
+                    'logfire.json_schema': {
+                        'type': 'object',
+                        'properties': {
+                            'request_data': {'type': 'object'},
+                            'gen_ai.request.model': {},
+                            'async': {},
+                            'duration': {},
+                            'response_data': {
+                                'type': 'object',
+                                'properties': {
+                                    'message': {
+                                        'type': 'object',
+                                        'title': 'ParsedChatCompletionMessage[object]',
+                                        'x-python-datatype': 'PydanticModel',
+                                    },
+                                    'usage': {
+                                        'type': 'object',
+                                        'title': 'CompletionUsage',
+                                        'x-python-datatype': 'PydanticModel',
+                                    },
+                                },
+                            },
+                        },
+                    },
+                    'logfire.tags': ('LLM',),
+                    'gen_ai.response.model': 'google/gemini-2.5-flash',
+                },
+            },
+        ]
+    )
diff --git a/uv.lock b/uv.lock