Skip to content

Commit 938c212

Browse files
authored
Fix OpenAI streaming reasoning (#1232)
1 parent dac05f0 commit 938c212

File tree

4 files changed

+431
-67
lines changed

4 files changed

+431
-67
lines changed

logfire/_internal/integrations/llm_providers/openai.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,10 @@ def __init__(self):
133133
)
134134

135135
def record_chunk(self, chunk: ChatCompletionChunk) -> None:
136-
self._stream_state.handle_chunk(chunk)
136+
try:
137+
self._stream_state.handle_chunk(chunk)
138+
except Exception:
139+
pass
137140

138141
def get_response_data(self) -> Any:
139142
try:
@@ -142,10 +145,12 @@ def get_response_data(self) -> Any:
142145
# AssertionError is raised when there is no completion snapshot
143146
# Return empty content to show an empty Assistant response in the UI
144147
return {'combined_chunk_content': '', 'chunk_count': 0}
145-
return {
146-
'message': final_completion.choices[0].message if final_completion.choices else None,
147-
'usage': final_completion.usage,
148-
}
148+
if final_completion.choices:
149+
message = final_completion.choices[0].message
150+
message.role = 'assistant'
151+
else:
152+
message = None
153+
return {'message': message, 'usage': final_completion.usage}
149154
except ImportError: # pragma: no cover
150155
OpenaiChatCompletionStreamState = OpenaiCompletionStreamState # type: ignore
151156

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
interactions:
2+
- request:
3+
body: '{"messages":[{"role":"user","content":"Hello, how are you? (This is a trick
4+
question)"}],"model":"google/gemini-2.5-flash","stream":true,"reasoning":{"effort":"low"}}'
5+
headers:
6+
accept:
7+
- application/json
8+
accept-encoding:
9+
- gzip, deflate, zstd
10+
connection:
11+
- keep-alive
12+
content-length:
13+
- '166'
14+
content-type:
15+
- application/json
16+
host:
17+
- openrouter.ai
18+
user-agent:
19+
- OpenAI/Python 1.93.1
20+
x-stainless-arch:
21+
- arm64
22+
x-stainless-async:
23+
- 'false'
24+
x-stainless-lang:
25+
- python
26+
x-stainless-os:
27+
- MacOS
28+
x-stainless-package-version:
29+
- 1.93.1
30+
x-stainless-read-timeout:
31+
- '600'
32+
x-stainless-retry-count:
33+
- '0'
34+
x-stainless-runtime:
35+
- CPython
36+
x-stainless-runtime-version:
37+
- 3.12.6
38+
method: POST
39+
uri: https://openrouter.ai/api/v1/chat/completions
40+
response:
41+
body:
42+
string: ': OPENROUTER PROCESSING
43+
44+
45+
: OPENROUTER PROCESSING
46+
47+
48+
: OPENROUTER PROCESSING
49+
50+
51+
: OPENROUTER PROCESSING
52+
53+
54+
: OPENROUTER PROCESSING
55+
56+
57+
: OPENROUTER PROCESSING
58+
59+
60+
: OPENROUTER PROCESSING
61+
62+
63+
: OPENROUTER PROCESSING
64+
65+
66+
: OPENROUTER PROCESSING
67+
68+
69+
: OPENROUTER PROCESSING
70+
71+
72+
: OPENROUTER PROCESSING
73+
74+
75+
data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"","reasoning":"**Interpreting
76+
User Intent**\n\nI''m zeroing in on the core of the query. The \"how are you\"
77+
is basic, but the \"trick question\" label is key. My focus is on decoding
78+
what the user *really* wants. I''m anticipating something beyond a simple
79+
pleasantry.\n\n\n","reasoning_details":[{"type":"reasoning.text","text":"**Interpreting
80+
User Intent**\n\nI''m zeroing in on the core of the query. The \"how are you\"
81+
is basic, but the \"trick question\" label is key. My focus is on decoding
82+
what the user *really* wants. I''m anticipating something beyond a simple
83+
pleasantry.\n\n\n","provider":"google-vertex"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
84+
85+
86+
: OPENROUTER PROCESSING
87+
88+
89+
: OPENROUTER PROCESSING
90+
91+
92+
: OPENROUTER PROCESSING
93+
94+
95+
: OPENROUTER PROCESSING
96+
97+
98+
: OPENROUTER PROCESSING
99+
100+
101+
data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"","reasoning":"**Deconstructing
102+
the \"Trick\"**\n\nI''ve analyzed the intent: the trick lies in the expectation
103+
gap between human and AI responses. The raw data is processed. I need to politely
104+
but directly address the inherent lack of feeling, then shift focus to my
105+
capabilities. I''m now drafting refined responses, iterating for a balance
106+
of humor, clarity, and user engagement.\n\n\n","reasoning_details":[{"type":"reasoning.text","text":"**Deconstructing
107+
the \"Trick\"**\n\nI''ve analyzed the intent: the trick lies in the expectation
108+
gap between human and AI responses. The raw data is processed. I need to politely
109+
but directly address the inherent lack of feeling, then shift focus to my
110+
capabilities. I''m now drafting refined responses, iterating for a balance
111+
of humor, clarity, and user engagement.\n\n\n","provider":"google-vertex"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
112+
113+
114+
: OPENROUTER PROCESSING
115+
116+
117+
: OPENROUTER PROCESSING
118+
119+
120+
: OPENROUTER PROCESSING
121+
122+
123+
: OPENROUTER PROCESSING
124+
125+
126+
: OPENROUTER PROCESSING
127+
128+
129+
data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"","reasoning":"**Formulating
130+
the Perfect Response**\n\nThe response is evolving. I''ve created multiple
131+
drafts, each aiming for the ideal blend of acknowledging the \"trick,\" explaining
132+
my AI limitations, and offering assistance. I''m now focused on fine-tuning
133+
the tone. I''m evaluating options for humor to enhance the user experience.
134+
I''m experimenting with different phrasing to ensure the response is both
135+
clever and clear, avoiding being overly verbose.\n\n\n","reasoning_details":[{"type":"reasoning.text","text":"**Formulating
136+
the Perfect Response**\n\nThe response is evolving. I''ve created multiple
137+
drafts, each aiming for the ideal blend of acknowledging the \"trick,\" explaining
138+
my AI limitations, and offering assistance. I''m now focused on fine-tuning
139+
the tone. I''m evaluating options for humor to enhance the user experience.
140+
I''m experimenting with different phrasing to ensure the response is both
141+
clever and clear, avoiding being overly verbose.\n\n\n","provider":"google-vertex"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
142+
143+
144+
: OPENROUTER PROCESSING
145+
146+
147+
data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"","reasoning":"**Optimizing
148+
Response Structure**\n\nI''m now focused on the final polishing stage. The
149+
draft is finalized; it acknowledges the question''s nature, explains my AI
150+
limitations, offers a functional answer, and re-engages the user. I''m tweaking
151+
the flow, tightening the language, and optimizing the response''s overall
152+
impact, aiming for maximum clarity and user satisfaction.\n\n\n","reasoning_details":[{"type":"reasoning.text","text":"**Optimizing
153+
Response Structure**\n\nI''m now focused on the final polishing stage. The
154+
draft is finalized; it acknowledges the question''s nature, explains my AI
155+
limitations, offers a functional answer, and re-engages the user. I''m tweaking
156+
the flow, tightening the language, and optimizing the response''s overall
157+
impact, aiming for maximum clarity and user satisfaction.\n\n\n","provider":"google-vertex"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
158+
159+
160+
data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"That''s
161+
a clever way to put it! You''re right, it is a bit of a trick question for
162+
an AI.\n\nAs a large language model, I don''t experience emotions, have a
163+
physical body, or","reasoning":null,"reasoning_details":[]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
164+
165+
166+
data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":"
167+
\"feel\" things in the human sense, so I can''t really quantify \"how\" I
168+
am.\n\nHowever, I am fully operational, my systems are running smoothly, and
169+
I''m ready to assist you!\n\nSo, while I can''t genuinely answer it for myself,
170+
how are *you* doing today","reasoning":null,"reasoning_details":[]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}
171+
172+
173+
data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":",
174+
and what can I help you with?","reasoning":null,"reasoning_details":[]},"finish_reason":"stop","native_finish_reason":"STOP","logprobs":null}]}
175+
176+
177+
data: {"id":"gen-1751918758-SZqPQwzFgmd8JdDUxRuL","provider":"Google","model":"google/gemini-2.5-flash","object":"chat.completion.chunk","created":1751918759,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"usage":{"prompt_tokens":13,"completion_tokens":1003,"total_tokens":1016}}
178+
179+
180+
data: [DONE]
181+
182+
183+
'
184+
headers:
185+
Access-Control-Allow-Origin:
186+
- '*'
187+
CF-RAY:
188+
- 95b9f5aaead53f1f-CPT
189+
Cache-Control:
190+
- no-cache
191+
Connection:
192+
- keep-alive
193+
Content-Type:
194+
- text/event-stream
195+
Date:
196+
- Mon, 07 Jul 2025 20:06:03 GMT
197+
Permissions-Policy:
198+
- payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com"
199+
"https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com")
200+
Referrer-Policy:
201+
- no-referrer, strict-origin-when-cross-origin
202+
Server:
203+
- cloudflare
204+
Transfer-Encoding:
205+
- chunked
206+
Vary:
207+
- Accept-Encoding
208+
X-Content-Type-Options:
209+
- nosniff
210+
status:
211+
code: 200
212+
message: OK
213+
version: 1

tests/otel_integrations/test_openai.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2149,3 +2149,149 @@ def test_responses_api_nonrecording(exporter: TestExporter, config_kwargs: dict[
21492149
assert response.output_text == snapshot('Hello! How can I help you today? 😊')
21502150

21512151
assert exporter.exported_spans_as_dict() == []
2152+
2153+
2154+
@pytest.mark.vcr()
2155+
def test_openrouter_streaming_reasoning(exporter: TestExporter) -> None:
2156+
client = openai.Client(base_url='https://openrouter.ai/api/v1')
2157+
logfire.instrument_openai(client)
2158+
2159+
response = client.chat.completions.create(
2160+
model='google/gemini-2.5-flash',
2161+
messages=[{'role': 'user', 'content': 'Hello, how are you? (This is a trick question)'}],
2162+
stream=True,
2163+
extra_body={'reasoning': {'effort': 'low'}},
2164+
)
2165+
2166+
for _ in response:
2167+
...
2168+
2169+
assert exporter.exported_spans_as_dict(parse_json_attributes=True) == snapshot(
2170+
[
2171+
{
2172+
'name': 'Chat Completion with {request_data[model]!r}',
2173+
'context': {'trace_id': 1, 'span_id': 1, 'is_remote': False},
2174+
'parent': None,
2175+
'start_time': 1000000000,
2176+
'end_time': 2000000000,
2177+
'attributes': {
2178+
'code.filepath': 'test_openai.py',
2179+
'code.function': 'test_openrouter_streaming_reasoning',
2180+
'code.lineno': 123,
2181+
'request_data': {
2182+
'messages': [{'role': 'user', 'content': 'Hello, how are you? (This is a trick question)'}],
2183+
'model': 'google/gemini-2.5-flash',
2184+
'stream': True,
2185+
},
2186+
'gen_ai.request.model': 'google/gemini-2.5-flash',
2187+
'async': False,
2188+
'logfire.msg_template': 'Chat Completion with {request_data[model]!r}',
2189+
'logfire.msg': "Chat Completion with 'google/gemini-2.5-flash'",
2190+
'logfire.json_schema': {
2191+
'type': 'object',
2192+
'properties': {'request_data': {'type': 'object'}, 'gen_ai.request.model': {}, 'async': {}},
2193+
},
2194+
'logfire.tags': ('LLM',),
2195+
'logfire.span_type': 'span',
2196+
'gen_ai.response.model': 'google/gemini-2.5-flash',
2197+
},
2198+
},
2199+
{
2200+
'name': 'streaming response from {request_data[model]!r} took {duration:.2f}s',
2201+
'context': {'trace_id': 2, 'span_id': 3, 'is_remote': False},
2202+
'parent': None,
2203+
'start_time': 5000000000,
2204+
'end_time': 5000000000,
2205+
'attributes': {
2206+
'logfire.span_type': 'log',
2207+
'logfire.level_num': 9,
2208+
'logfire.msg_template': 'streaming response from {request_data[model]!r} took {duration:.2f}s',
2209+
'logfire.msg': "streaming response from 'google/gemini-2.5-flash' took 1.00s",
2210+
'code.filepath': 'test_openai.py',
2211+
'code.function': 'test_openrouter_streaming_reasoning',
2212+
'code.lineno': 123,
2213+
'request_data': {
2214+
'messages': [{'role': 'user', 'content': 'Hello, how are you? (This is a trick question)'}],
2215+
'model': 'google/gemini-2.5-flash',
2216+
'stream': True,
2217+
},
2218+
'gen_ai.request.model': 'google/gemini-2.5-flash',
2219+
'async': False,
2220+
'duration': 1.0,
2221+
'response_data': {
2222+
'message': {
2223+
'content': """\
2224+
That's a clever way to put it! You're right, it is a bit of a trick question for an AI.
2225+
2226+
As a large language model, I don't experience emotions, have a physical body, or "feel" things in the human sense, so I can't really quantify "how" I am.
2227+
2228+
However, I am fully operational, my systems are running smoothly, and I'm ready to assist you!
2229+
2230+
So, while I can't genuinely answer it for myself, how are *you* doing today, and what can I help you with?\
2231+
""",
2232+
'refusal': None,
2233+
'role': 'assistant',
2234+
'annotations': None,
2235+
'audio': None,
2236+
'function_call': None,
2237+
'tool_calls': None,
2238+
'parsed': None,
2239+
'reasoning': """\
2240+
**Interpreting User Intent**
2241+
2242+
I'm zeroing in on the core of the query. The "how are you" is basic, but the "trick question" label is key. My focus is on decoding what the user *really* wants. I'm anticipating something beyond a simple pleasantry.
2243+
2244+
2245+
""",
2246+
'reasoning_details': [
2247+
{
2248+
'type': 'reasoning.text',
2249+
'text': """\
2250+
**Interpreting User Intent**
2251+
2252+
I'm zeroing in on the core of the query. The "how are you" is basic, but the "trick question" label is key. My focus is on decoding what the user *really* wants. I'm anticipating something beyond a simple pleasantry.
2253+
2254+
2255+
""",
2256+
'provider': 'google-vertex',
2257+
}
2258+
],
2259+
},
2260+
'usage': {
2261+
'completion_tokens': 1003,
2262+
'prompt_tokens': 13,
2263+
'total_tokens': 1016,
2264+
'completion_tokens_details': None,
2265+
'prompt_tokens_details': None,
2266+
},
2267+
},
2268+
'logfire.json_schema': {
2269+
'type': 'object',
2270+
'properties': {
2271+
'request_data': {'type': 'object'},
2272+
'gen_ai.request.model': {},
2273+
'async': {},
2274+
'duration': {},
2275+
'response_data': {
2276+
'type': 'object',
2277+
'properties': {
2278+
'message': {
2279+
'type': 'object',
2280+
'title': 'ParsedChatCompletionMessage[object]',
2281+
'x-python-datatype': 'PydanticModel',
2282+
},
2283+
'usage': {
2284+
'type': 'object',
2285+
'title': 'CompletionUsage',
2286+
'x-python-datatype': 'PydanticModel',
2287+
},
2288+
},
2289+
},
2290+
},
2291+
},
2292+
'logfire.tags': ('LLM',),
2293+
'gen_ai.response.model': 'google/gemini-2.5-flash',
2294+
},
2295+
},
2296+
]
2297+
)

0 commit comments

Comments
 (0)