fix: streaming output event

k11kirky · k11kirky · commit d8e4557d519a · 2025-01-09T22:15:44.000-08:00
diff --git a/llm_observability_examples.py b/llm_observability_examples.py
@@ -25,23 +25,30 @@
 def main_sync():
     trace_id = str(uuid.uuid4())
     print("Trace ID:", trace_id)
+    distinct_id = "test_distinct_id"
+    properties = {"test_property": "test_value"}
 
     try:
-        basic_openai_call()
-        streaming_openai_call()
+        basic_openai_call(distinct_id, trace_id, properties)
+        # streaming_openai_call(distinct_id, trace_id, properties)
     except Exception as e:
         print("Error during OpenAI call:", str(e))
 
 
 async def main_async():
+    trace_id = str(uuid.uuid4())
+    print("Trace ID:", trace_id)
+    distinct_id = "test_distinct_id"
+    properties = {"test_property": "test_value"}
+
     try:
-        await basic_async_openai_call()
-        await streaming_async_openai_call()
+        await basic_async_openai_call(distinct_id, trace_id, properties)
+        await streaming_async_openai_call(distinct_id, trace_id, properties)
     except Exception as e:
         print("Error during OpenAI call:", str(e))
 
 
-def basic_openai_call():
+def basic_openai_call(distinct_id, trace_id, properties):
     response = openai_client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -50,6 +57,9 @@ def basic_openai_call():
         ],
         max_tokens=100,
         temperature=0.7,
+        posthog_distinct_id=distinct_id,
+        posthog_trace_id=trace_id,
+        posthog_properties=properties,
     )
     if response and response.choices:
         print("OpenAI response:", response.choices[0].message.content)
@@ -58,7 +68,7 @@ def basic_openai_call():
     return response
 
 
-async def basic_async_openai_call():
+async def basic_async_openai_call(distinct_id, trace_id, properties):
     response = await async_openai_client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -67,6 +77,9 @@ async def basic_async_openai_call():
         ],
         max_tokens=100,
         temperature=0.7,
+        posthog_distinct_id=distinct_id,
+        posthog_trace_id=trace_id,
+        posthog_properties=properties,
     )
     if response and hasattr(response, "choices"):
         print("OpenAI response:", response.choices[0].message.content)
@@ -75,7 +88,7 @@ async def basic_async_openai_call():
     return response
 
 
-def streaming_openai_call():
+def streaming_openai_call(distinct_id, trace_id, properties):
     response = openai_client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -85,6 +98,9 @@ def streaming_openai_call():
         max_tokens=100,
         temperature=0.7,
         stream=True,
+        posthog_distinct_id=distinct_id,
+        posthog_trace_id=trace_id,
+        posthog_properties=properties,
     )
 
     for chunk in response:
@@ -93,7 +109,7 @@ def streaming_openai_call():
     return response
 
 
-async def streaming_async_openai_call():
+async def streaming_async_openai_call(distinct_id, trace_id, properties):
     response = await async_openai_client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
@@ -103,6 +119,9 @@ async def streaming_async_openai_call():
         max_tokens=100,
         temperature=0.7,
         stream=True,
+        posthog_distinct_id=distinct_id,
+        posthog_trace_id=trace_id,
+        posthog_properties=properties,
     )
 
     async for chunk in response:
diff --git a/posthog/ai/providers/openai/openai.py b/posthog/ai/providers/openai/openai.py
@@ -1,3 +1,4 @@
+import time
 from typing import Any, Dict, Optional, Union
 
 try:
@@ -6,7 +7,10 @@
     raise ModuleNotFoundError("Please install OpenAI to use this feature: 'pip install openai'")
 
 from posthog.client import Client as PostHogClient
-from posthog.ai.utils import process_sync_streaming_response, track_usage
+from posthog.ai.utils import (
+    track_usage,
+    get_model_params,
+)
 
 
 class OpenAI:
@@ -56,31 +60,98 @@ def create(
         posthog_properties: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ):
-        """
-        Wraps openai chat completions and captures a $ai_generation event in PostHog.
-
-        PostHog-specific parameters:
-            - posthog_distinct_id: Ties the resulting event to a user in PostHog.
-            - posthog_trace_id: For grouping multiple calls into a single trace.
-            - posthog_properties: Additional custom properties for PostHog analytics.
-        """
         distinct_id = posthog_distinct_id or "anonymous_ai_user"
-
-        # If streaming, handle it separately
+        
         if kwargs.get("stream", False):
-            response = self._openai_client.chat.completions.create(**kwargs)
-            return process_sync_streaming_response(
-                response=response,
-                ph_client=self._ph_client,
-                event_properties={},
-                distinct_id=distinct_id,
+            return self._create_streaming(
+                distinct_id,
+                posthog_trace_id,
+                posthog_properties,
+                **kwargs,
             )
 
-        # Non-streaming: let track_usage handle the request and analytics
+        
         def call_method(**call_kwargs):
             return self._openai_client.chat.completions.create(**call_kwargs)
 
-        response = track_usage(
-            distinct_id, self._ph_client, posthog_trace_id, posthog_properties, call_method, **kwargs
+        return track_usage(
+            distinct_id,
+            self._ph_client,
+            posthog_trace_id,
+            posthog_properties,
+            call_method,
+            **kwargs,
         )
-        return response
+
+    def _create_streaming(
+        self,
+        distinct_id: str,
+        posthog_trace_id: Optional[str],
+        posthog_properties: Optional[Dict[str, Any]],
+        **kwargs: Any,
+    ):
+        start_time = time.time()
+        usage_stats: Dict[str, int] = {}
+        accumulated_content = []
+        stream_options = {"include_usage": True}
+        response = self._openai_client.chat.completions.create(**kwargs, stream_options=stream_options)
+
+        def generator():
+            nonlocal usage_stats
+            nonlocal accumulated_content
+            try:
+                for chunk in response:
+                    if hasattr(chunk, "usage") and chunk.usage:
+                        usage_stats = {
+                            k: getattr(chunk.usage, k, 0)
+                            for k in ["prompt_tokens", "completion_tokens", "total_tokens"]
+                        }
+                    if chunk.choices[0].delta.content:
+                        accumulated_content.append(chunk.choices[0].delta.content)
+                    yield chunk
+            finally:
+                end_time = time.time()
+                latency = end_time - start_time
+                output = "".join(accumulated_content)
+                self._capture_streaming_event(distinct_id, posthog_trace_id, posthog_properties, kwargs, usage_stats, latency, output)
+
+        return generator()
+
+    def _capture_streaming_event(
+        self,
+        distinct_id: str,
+        posthog_trace_id: Optional[str],
+        posthog_properties: Optional[Dict[str, Any]],
+        kwargs: Dict[str, Any],
+        usage_stats: Dict[str, int],
+        latency: float,
+        output: str,
+    ):
+        
+        event_properties = {
+            "$ai_provider": "openai",
+            "$ai_model": kwargs.get("model"),
+            "$ai_model_parameters": get_model_params(kwargs),
+            "$ai_input": kwargs.get("messages"),
+            "$ai_output": {
+                "choices": [
+                    {
+                        "content": output,
+                        "role": "assistant",
+                    }
+                ]
+            }, 
+            "$ai_http_status": 200,
+            "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
+            "$ai_output_tokens": usage_stats.get("completion_tokens", 0),
+            "$ai_latency": latency,
+            "$ai_trace_id": posthog_trace_id,
+            "$ai_posthog_properties": posthog_properties,
+        }
+
+        if hasattr(self._ph_client, "capture"):
+            self._ph_client.capture(
+                distinct_id=distinct_id,
+                event="$ai_generation",
+                properties=event_properties,
+            )
diff --git a/posthog/ai/providers/openai/openai_async.py b/posthog/ai/providers/openai/openai_async.py
@@ -1,3 +1,4 @@
+import time
 from typing import Any, Dict, Optional, Union
 
 try:
@@ -6,7 +7,7 @@
     raise ModuleNotFoundError("Please install OpenAI to use this feature: 'pip install openai'")
 
 from posthog.client import Client as PostHogClient
-from posthog.ai.utils import process_async_streaming_response, track_usage_async
+from posthog.ai.utils import track_usage_async, get_model_params
 
 
 class AsyncOpenAI:
@@ -55,23 +56,15 @@ async def create(
         posthog_properties: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ):
-        """
-        Wraps openai chat completions (async) and captures a $ai_generation event in PostHog.
-
-        To use streaming in async mode:
-            async for chunk in async_openai.chat.completions.create(stream=True, ...):
-                ...
-        """
         distinct_id = posthog_distinct_id or "anonymous_ai_user"
 
         # If streaming, handle streaming specifically
         if kwargs.get("stream", False):
-            response = await self._openai_client.chat.completions.create(**kwargs)
-            return process_async_streaming_response(
-                response=response,
-                ph_client=self._ph_client,
-                event_properties={},
-                distinct_id=distinct_id,
+            return await self._create_streaming(
+                distinct_id,
+                posthog_trace_id,
+                posthog_properties,
+                **kwargs,
             )
 
         # Non-streaming: let track_usage_async handle request and analytics
@@ -82,3 +75,77 @@ async def call_async_method(**call_kwargs):
             distinct_id, self._ph_client, posthog_trace_id, posthog_properties, call_async_method, **kwargs
         )
         return response
+    
+
+    async def _create_streaming(
+        self,
+        distinct_id: str,
+        posthog_trace_id: Optional[str],
+        posthog_properties: Optional[Dict[str, Any]],
+        **kwargs: Any,
+    ):
+        start_time = time.time()
+        usage_stats: Dict[str, int] = {}
+        accumulated_content = []
+        stream_options = {"include_usage": True}
+        response = await self._openai_client.chat.completions.create(**kwargs, stream_options=stream_options)
+
+        async def async_generator():
+            nonlocal usage_stats, accumulated_content
+            try:
+                async for chunk in response:
+                    if hasattr(chunk, "usage") and chunk.usage:
+                        usage_stats = {
+                            k: getattr(chunk.usage, k, 0)
+                            for k in ["prompt_tokens", "completion_tokens", "total_tokens"]
+                        }
+                    if chunk.choices[0].delta.content:
+                        accumulated_content.append(chunk.choices[0].delta.content)
+                    yield chunk
+            finally:
+                end_time = time.time()
+                latency = end_time - start_time
+                output = "".join(accumulated_content)
+                self._capture_streaming_event(distinct_id, posthog_trace_id, posthog_properties, kwargs, usage_stats, latency, output)
+
+        return async_generator()
+
+    def _capture_streaming_event(
+        self,
+        distinct_id: str,
+        posthog_trace_id: Optional[str],
+        posthog_properties: Optional[Dict[str, Any]],
+        kwargs: Dict[str, Any],
+        usage_stats: Dict[str, int],
+        latency: float,
+        output: str,
+    ):
+        
+        event_properties = {
+            "$ai_provider": "openai",
+            "$ai_model": kwargs.get("model"),
+            "$ai_model_parameters": get_model_params(kwargs),
+            "$ai_input": kwargs.get("messages"),
+            "$ai_output": {
+                "choices": [
+                    {
+                        "content": output,
+                        "role": "assistant",
+                    }
+                ]
+            }, 
+            "$ai_http_status": 200,
+            "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
+            "$ai_output_tokens": usage_stats.get("completion_tokens", 0),
+            "$ai_latency": latency,
+            "$ai_trace_id": posthog_trace_id,
+            "$ai_posthog_properties": posthog_properties,
+        }
+
+        if hasattr(self._ph_client, "capture"):
+            self._ph_client.capture(
+                distinct_id=distinct_id,
+                event="$ai_generation",
+                properties=event_properties,
+            )
+
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py