fix(llma): extract model from response for OpenAI stored prompts (#395)

andrewm4894 · claude · greptile-apps[bot] · web-flow · commit 14d1d0b99c21 · 2025-12-21T21:17:32.000Z
* fix: extract model from response for OpenAI stored prompts When using OpenAI stored prompts, the model is defined in the OpenAI dashboard rather than passed in the API request. This change adds a fallback to extract the model from the response object when not provided in kwargs. Fixes PostHog/posthog#42861 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * test: add tests for model extraction fallback and bump to 7.4.1 - Add 8 tests covering model extraction from response for stored prompts - Fix utils.py to add 'unknown' fallback for consistency - Bump version to 7.4.1 - Update CHANGELOG.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * style: format utils.py with ruff 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: remove 'unknown' fallback from non-streaming to match original behavior Non-streaming originally returned None when model wasn't in kwargs. Streaming keeps "unknown" fallback as that was the original behavior. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * test: add test for None model fallback in non-streaming Verifies that non-streaming returns None (not "unknown") when model is not available in kwargs or response, matching original behavior. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# 7.4.1 - 2025-12-19
+
+fix: extract model from response for OpenAI stored prompts
+
+When using OpenAI stored prompts, the model is defined in the OpenAI dashboard rather than passed in the API request. This fix adds a fallback to extract the model from the response object when not provided in kwargs, ensuring generations show up with the correct model and enabling cost calculations.
+
 # 7.4.0 - 2025-12-16
 
 feat: Add automatic retries for feature flag requests
diff --git a/posthog/ai/openai/openai.py b/posthog/ai/openai/openai.py
@@ -124,14 +124,23 @@ def _create_streaming(
         start_time = time.time()
         usage_stats: TokenUsage = TokenUsage()
         final_content = []
+        model_from_response: Optional[str] = None
         response = self._original.create(**kwargs)
 
         def generator():
             nonlocal usage_stats
             nonlocal final_content  # noqa: F824
+            nonlocal model_from_response
 
             try:
                 for chunk in response:
+                    # Extract model from response object in chunk (for stored prompts)
+                    if hasattr(chunk, "response") and chunk.response:
+                        if model_from_response is None and hasattr(
+                            chunk.response, "model"
+                        ):
+                            model_from_response = chunk.response.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
 
@@ -161,6 +170,7 @@ def generator():
                     latency,
                     output,
                     None,  # Responses API doesn't have tools
+                    model_from_response,
                 )
 
         return generator()
@@ -177,6 +187,7 @@ def _capture_streaming_event(
         latency: float,
         output: Any,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         from posthog.ai.types import StreamingEventData
         from posthog.ai.openai.openai_converter import (
@@ -189,9 +200,12 @@ def _capture_streaming_event(
         formatted_input = format_openai_streaming_input(kwargs, "responses")
         sanitized_input = sanitize_openai_response(formatted_input)
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_data = StreamingEventData(
             provider="openai",
-            model=kwargs.get("model", "unknown"),
+            model=model,
             base_url=str(self._client.base_url),
             kwargs=kwargs,
             formatted_input=sanitized_input,
@@ -320,6 +334,7 @@ def _create_streaming(
         usage_stats: TokenUsage = TokenUsage()
         accumulated_content = []
         accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
+        model_from_response: Optional[str] = None
         if "stream_options" not in kwargs:
             kwargs["stream_options"] = {}
         kwargs["stream_options"]["include_usage"] = True
@@ -329,9 +344,14 @@ def generator():
             nonlocal usage_stats
             nonlocal accumulated_content  # noqa: F824
             nonlocal accumulated_tool_calls
+            nonlocal model_from_response
 
             try:
                 for chunk in response:
+                    # Extract model from chunk (Chat Completions chunks have model field)
+                    if model_from_response is None and hasattr(chunk, "model"):
+                        model_from_response = chunk.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
 
@@ -376,6 +396,7 @@ def generator():
                     accumulated_content,
                     tool_calls_list,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return generator()
@@ -393,6 +414,7 @@ def _capture_streaming_event(
         output: Any,
         tool_calls: Optional[List[Dict[str, Any]]] = None,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         from posthog.ai.types import StreamingEventData
         from posthog.ai.openai.openai_converter import (
@@ -405,9 +427,12 @@ def _capture_streaming_event(
         formatted_input = format_openai_streaming_input(kwargs, "chat")
         sanitized_input = sanitize_openai(formatted_input)
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_data = StreamingEventData(
             provider="openai",
-            model=kwargs.get("model", "unknown"),
+            model=model,
             base_url=str(self._client.base_url),
             kwargs=kwargs,
             formatted_input=sanitized_input,
diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
@@ -128,14 +128,23 @@ async def _create_streaming(
         start_time = time.time()
         usage_stats: TokenUsage = TokenUsage()
         final_content = []
+        model_from_response: Optional[str] = None
         response = await self._original.create(**kwargs)
 
         async def async_generator():
             nonlocal usage_stats
             nonlocal final_content  # noqa: F824
+            nonlocal model_from_response
 
             try:
                 async for chunk in response:
+                    # Extract model from response object in chunk (for stored prompts)
+                    if hasattr(chunk, "response") and chunk.response:
+                        if model_from_response is None and hasattr(
+                            chunk.response, "model"
+                        ):
+                            model_from_response = chunk.response.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
 
@@ -166,6 +175,7 @@ async def async_generator():
                     latency,
                     output,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return async_generator()
@@ -182,13 +192,17 @@ async def _capture_streaming_event(
         latency: float,
         output: Any,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         if posthog_trace_id is None:
             posthog_trace_id = str(uuid.uuid4())
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_properties = {
             "$ai_provider": "openai",
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": model,
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 self._client._ph_client,
@@ -350,6 +364,7 @@ async def _create_streaming(
         usage_stats: TokenUsage = TokenUsage()
         accumulated_content = []
         accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
+        model_from_response: Optional[str] = None
 
         if "stream_options" not in kwargs:
             kwargs["stream_options"] = {}
@@ -360,9 +375,14 @@ async def async_generator():
             nonlocal usage_stats
             nonlocal accumulated_content  # noqa: F824
             nonlocal accumulated_tool_calls
+            nonlocal model_from_response
 
             try:
                 async for chunk in response:
+                    # Extract model from chunk (Chat Completions chunks have model field)
+                    if model_from_response is None and hasattr(chunk, "model"):
+                        model_from_response = chunk.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
                     if chunk_usage:
@@ -405,6 +425,7 @@ async def async_generator():
                     accumulated_content,
                     tool_calls_list,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return async_generator()
@@ -422,13 +443,17 @@ async def _capture_streaming_event(
         output: Any,
         tool_calls: Optional[List[Dict[str, Any]]] = None,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         if posthog_trace_id is None:
             posthog_trace_id = str(uuid.uuid4())
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_properties = {
             "$ai_provider": "openai",
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": model,
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 self._client._ph_client,
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
@@ -285,7 +285,7 @@ def call_llm_and_track_usage(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
@@ -396,7 +396,7 @@ async def call_llm_and_track_usage_async(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
diff --git a/posthog/version.py b/posthog/version.py