Skip to content

Commit 14d1d0b

Browse files
andrewm4894claudegreptile-apps[bot]
authored
fix(llma): extract model from response for OpenAI stored prompts (#395)
* fix: extract model from response for OpenAI stored prompts When using OpenAI stored prompts, the model is defined in the OpenAI dashboard rather than passed in the API request. This change adds a fallback to extract the model from the response object when not provided in kwargs. Fixes PostHog/posthog#42861 Co-Authored-By: Claude Opus 4.5 <[email protected]> * Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * test: add tests for model extraction fallback and bump to 7.4.1 - Add 8 tests covering model extraction from response for stored prompts - Fix utils.py to add 'unknown' fallback for consistency - Bump version to 7.4.1 - Update CHANGELOG.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]> * style: format utils.py with ruff 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]> * fix: remove 'unknown' fallback from non-streaming to match original behavior Non-streaming originally returned None when model wasn't in kwargs. Streaming keeps "unknown" fallback as that was the original behavior. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]> * test: add test for None model fallback in non-streaming Verifies that non-streaming returns None (not "unknown") when model is not available in kwargs or response, matching original behavior. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]> --------- Co-authored-by: Claude Opus 4.5 <[email protected]> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
1 parent 80e6e43 commit 14d1d0b

File tree

6 files changed

+519
-7
lines changed

6 files changed

+519
-7
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# 7.4.1 - 2025-12-19
2+
3+
fix: extract model from response for OpenAI stored prompts
4+
5+
When using OpenAI stored prompts, the model is defined in the OpenAI dashboard rather than passed in the API request. This fix adds a fallback to extract the model from the response object when not provided in kwargs, ensuring generations show up with the correct model and enabling cost calculations.
6+
17
# 7.4.0 - 2025-12-16
28

39
feat: Add automatic retries for feature flag requests

posthog/ai/openai/openai.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,23 @@ def _create_streaming(
124124
start_time = time.time()
125125
usage_stats: TokenUsage = TokenUsage()
126126
final_content = []
127+
model_from_response: Optional[str] = None
127128
response = self._original.create(**kwargs)
128129

129130
def generator():
130131
nonlocal usage_stats
131132
nonlocal final_content # noqa: F824
133+
nonlocal model_from_response
132134

133135
try:
134136
for chunk in response:
137+
# Extract model from response object in chunk (for stored prompts)
138+
if hasattr(chunk, "response") and chunk.response:
139+
if model_from_response is None and hasattr(
140+
chunk.response, "model"
141+
):
142+
model_from_response = chunk.response.model
143+
135144
# Extract usage stats from chunk
136145
chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
137146

@@ -161,6 +170,7 @@ def generator():
161170
latency,
162171
output,
163172
None, # Responses API doesn't have tools
173+
model_from_response,
164174
)
165175

166176
return generator()
@@ -177,6 +187,7 @@ def _capture_streaming_event(
177187
latency: float,
178188
output: Any,
179189
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
190+
model_from_response: Optional[str] = None,
180191
):
181192
from posthog.ai.types import StreamingEventData
182193
from posthog.ai.openai.openai_converter import (
@@ -189,9 +200,12 @@ def _capture_streaming_event(
189200
formatted_input = format_openai_streaming_input(kwargs, "responses")
190201
sanitized_input = sanitize_openai_response(formatted_input)
191202

203+
# Use model from kwargs, fallback to model from response
204+
model = kwargs.get("model") or model_from_response or "unknown"
205+
192206
event_data = StreamingEventData(
193207
provider="openai",
194-
model=kwargs.get("model", "unknown"),
208+
model=model,
195209
base_url=str(self._client.base_url),
196210
kwargs=kwargs,
197211
formatted_input=sanitized_input,
@@ -320,6 +334,7 @@ def _create_streaming(
320334
usage_stats: TokenUsage = TokenUsage()
321335
accumulated_content = []
322336
accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
337+
model_from_response: Optional[str] = None
323338
if "stream_options" not in kwargs:
324339
kwargs["stream_options"] = {}
325340
kwargs["stream_options"]["include_usage"] = True
@@ -329,9 +344,14 @@ def generator():
329344
nonlocal usage_stats
330345
nonlocal accumulated_content # noqa: F824
331346
nonlocal accumulated_tool_calls
347+
nonlocal model_from_response
332348

333349
try:
334350
for chunk in response:
351+
# Extract model from chunk (Chat Completions chunks have model field)
352+
if model_from_response is None and hasattr(chunk, "model"):
353+
model_from_response = chunk.model
354+
335355
# Extract usage stats from chunk
336356
chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
337357

@@ -376,6 +396,7 @@ def generator():
376396
accumulated_content,
377397
tool_calls_list,
378398
extract_available_tool_calls("openai", kwargs),
399+
model_from_response,
379400
)
380401

381402
return generator()
@@ -393,6 +414,7 @@ def _capture_streaming_event(
393414
output: Any,
394415
tool_calls: Optional[List[Dict[str, Any]]] = None,
395416
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
417+
model_from_response: Optional[str] = None,
396418
):
397419
from posthog.ai.types import StreamingEventData
398420
from posthog.ai.openai.openai_converter import (
@@ -405,9 +427,12 @@ def _capture_streaming_event(
405427
formatted_input = format_openai_streaming_input(kwargs, "chat")
406428
sanitized_input = sanitize_openai(formatted_input)
407429

430+
# Use model from kwargs, fallback to model from response
431+
model = kwargs.get("model") or model_from_response or "unknown"
432+
408433
event_data = StreamingEventData(
409434
provider="openai",
410-
model=kwargs.get("model", "unknown"),
435+
model=model,
411436
base_url=str(self._client.base_url),
412437
kwargs=kwargs,
413438
formatted_input=sanitized_input,

posthog/ai/openai/openai_async.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,14 +128,23 @@ async def _create_streaming(
128128
start_time = time.time()
129129
usage_stats: TokenUsage = TokenUsage()
130130
final_content = []
131+
model_from_response: Optional[str] = None
131132
response = await self._original.create(**kwargs)
132133

133134
async def async_generator():
134135
nonlocal usage_stats
135136
nonlocal final_content # noqa: F824
137+
nonlocal model_from_response
136138

137139
try:
138140
async for chunk in response:
141+
# Extract model from response object in chunk (for stored prompts)
142+
if hasattr(chunk, "response") and chunk.response:
143+
if model_from_response is None and hasattr(
144+
chunk.response, "model"
145+
):
146+
model_from_response = chunk.response.model
147+
139148
# Extract usage stats from chunk
140149
chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
141150

@@ -166,6 +175,7 @@ async def async_generator():
166175
latency,
167176
output,
168177
extract_available_tool_calls("openai", kwargs),
178+
model_from_response,
169179
)
170180

171181
return async_generator()
@@ -182,13 +192,17 @@ async def _capture_streaming_event(
182192
latency: float,
183193
output: Any,
184194
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
195+
model_from_response: Optional[str] = None,
185196
):
186197
if posthog_trace_id is None:
187198
posthog_trace_id = str(uuid.uuid4())
188199

200+
# Use model from kwargs, fallback to model from response
201+
model = kwargs.get("model") or model_from_response or "unknown"
202+
189203
event_properties = {
190204
"$ai_provider": "openai",
191-
"$ai_model": kwargs.get("model"),
205+
"$ai_model": model,
192206
"$ai_model_parameters": get_model_params(kwargs),
193207
"$ai_input": with_privacy_mode(
194208
self._client._ph_client,
@@ -350,6 +364,7 @@ async def _create_streaming(
350364
usage_stats: TokenUsage = TokenUsage()
351365
accumulated_content = []
352366
accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
367+
model_from_response: Optional[str] = None
353368

354369
if "stream_options" not in kwargs:
355370
kwargs["stream_options"] = {}
@@ -360,9 +375,14 @@ async def async_generator():
360375
nonlocal usage_stats
361376
nonlocal accumulated_content # noqa: F824
362377
nonlocal accumulated_tool_calls
378+
nonlocal model_from_response
363379

364380
try:
365381
async for chunk in response:
382+
# Extract model from chunk (Chat Completions chunks have model field)
383+
if model_from_response is None and hasattr(chunk, "model"):
384+
model_from_response = chunk.model
385+
366386
# Extract usage stats from chunk
367387
chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
368388
if chunk_usage:
@@ -405,6 +425,7 @@ async def async_generator():
405425
accumulated_content,
406426
tool_calls_list,
407427
extract_available_tool_calls("openai", kwargs),
428+
model_from_response,
408429
)
409430

410431
return async_generator()
@@ -422,13 +443,17 @@ async def _capture_streaming_event(
422443
output: Any,
423444
tool_calls: Optional[List[Dict[str, Any]]] = None,
424445
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
446+
model_from_response: Optional[str] = None,
425447
):
426448
if posthog_trace_id is None:
427449
posthog_trace_id = str(uuid.uuid4())
428450

451+
# Use model from kwargs, fallback to model from response
452+
model = kwargs.get("model") or model_from_response or "unknown"
453+
429454
event_properties = {
430455
"$ai_provider": "openai",
431-
"$ai_model": kwargs.get("model"),
456+
"$ai_model": model,
432457
"$ai_model_parameters": get_model_params(kwargs),
433458
"$ai_input": with_privacy_mode(
434459
self._client._ph_client,

posthog/ai/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def call_llm_and_track_usage(
285285

286286
event_properties = {
287287
"$ai_provider": provider,
288-
"$ai_model": kwargs.get("model"),
288+
"$ai_model": kwargs.get("model") or getattr(response, "model", None),
289289
"$ai_model_parameters": get_model_params(kwargs),
290290
"$ai_input": with_privacy_mode(
291291
ph_client, posthog_privacy_mode, sanitized_messages
@@ -396,7 +396,7 @@ async def call_llm_and_track_usage_async(
396396

397397
event_properties = {
398398
"$ai_provider": provider,
399-
"$ai_model": kwargs.get("model"),
399+
"$ai_model": kwargs.get("model") or getattr(response, "model", None),
400400
"$ai_model_parameters": get_model_params(kwargs),
401401
"$ai_input": with_privacy_mode(
402402
ph_client, posthog_privacy_mode, sanitized_messages

0 commit comments

Comments
 (0)