PostHog
diff --git a/‎posthog/ai/anthropic/anthropic.py‎
Lines changed: 28 additions & 67 deletions b/‎posthog/ai/anthropic/anthropic.py‎
Lines changed: 28 additions & 67 deletions
diff --git a/‎posthog/ai/anthropic/anthropic_async.py‎
Lines changed: 1 addition & 1 deletion b/‎posthog/ai/anthropic/anthropic_async.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎posthog/ai/anthropic/anthropic_converter.py‎
Lines changed: 65 additions & 0 deletions b/‎posthog/ai/anthropic/anthropic_converter.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎posthog/ai/gemini/gemini.py‎
Lines changed: 23 additions & 38 deletions b/‎posthog/ai/gemini/gemini.py‎
Lines changed: 23 additions & 38 deletions
diff --git a/‎posthog/ai/gemini/gemini_converter.py‎
Lines changed: 20 additions & 0 deletions b/‎posthog/ai/gemini/gemini_converter.py‎
Lines changed: 20 additions & 0 deletions
@@ -12,13 +12,8 @@
 
 from posthog.ai.utils import (
     call_llm_and_track_usage,
-    extract_available_tool_calls,
-    get_model_params,
-    merge_system_prompt,
-    with_privacy_mode,
 )
 from posthog.ai.anthropic.anthropic_converter import (
-    format_anthropic_streaming_content,
     extract_anthropic_usage_from_event,
     handle_anthropic_content_block_start,
     handle_anthropic_text_delta,
@@ -215,66 +210,32 @@ def _capture_streaming_event(
         content_blocks: List[Dict[str, Any]],
         accumulated_content: str,
     ):
-        if posthog_trace_id is None:
-            posthog_trace_id = str(uuid.uuid4())
-
-        # Format output using converter
-        formatted_content = format_anthropic_streaming_content(content_blocks)
-        formatted_output = []
-
-        if formatted_content:
-            formatted_output = [{"role": "assistant", "content": formatted_content}]
-        else:
-            # Fallback to accumulated content if no blocks
-            formatted_output = [
-                {
-                    "role": "assistant",
-                    "content": [{"type": "text", "text": accumulated_content}],
-                }
-            ]
-
-        event_properties = {
-            "$ai_provider": "anthropic",
-            "$ai_model": kwargs.get("model"),
-            "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(
-                self._client._ph_client,
-                posthog_privacy_mode,
-                merge_system_prompt(kwargs, "anthropic"),
-            ),
-            "$ai_output_choices": with_privacy_mode(
-                self._client._ph_client,
-                posthog_privacy_mode,
-                formatted_output,
-            ),
-            "$ai_http_status": 200,
-            "$ai_input_tokens": usage_stats.get("input_tokens", 0),
-            "$ai_output_tokens": usage_stats.get("output_tokens", 0),
-            "$ai_cache_read_input_tokens": usage_stats.get(
-                "cache_read_input_tokens", 0
-            ),
-            "$ai_cache_creation_input_tokens": usage_stats.get(
-                "cache_creation_input_tokens", 0
+        from posthog.ai.types import StreamingEventData
+        from posthog.ai.anthropic.anthropic_converter import (
+            standardize_anthropic_usage,
+            format_anthropic_streaming_input,
+            format_anthropic_streaming_output_complete,
+        )
+        from posthog.ai.utils import capture_streaming_event
+
+        # Prepare standardized event data
+        event_data = StreamingEventData(
+            provider="anthropic",
+            model=kwargs.get("model"),
+            base_url=str(self._client.base_url),
+            kwargs=kwargs,
+            formatted_input=format_anthropic_streaming_input(kwargs),
+            formatted_output=format_anthropic_streaming_output_complete(
+                content_blocks, accumulated_content
             ),
-            "$ai_latency": latency,
-            "$ai_trace_id": posthog_trace_id,
-            "$ai_base_url": str(self._client.base_url),
-            **(posthog_properties or {}),
-        }
-
-        # Add tools if available
-        available_tools = extract_available_tool_calls("anthropic", kwargs)
-
-        if available_tools:
-            event_properties["$ai_tools"] = available_tools
-
-        if posthog_distinct_id is None:
-            event_properties["$process_person_profile"] = False
-
-        if hasattr(self._client._ph_client, "capture"):
-            self._client._ph_client.capture(
-                distinct_id=posthog_distinct_id or posthog_trace_id,
-                event="$ai_generation",
-                properties=event_properties,
-                groups=posthog_groups,
-            )
+            usage_stats=standardize_anthropic_usage(usage_stats),
+            latency=latency,
+            distinct_id=posthog_distinct_id,
+            trace_id=posthog_trace_id,
+            properties=posthog_properties,
+            privacy_mode=posthog_privacy_mode,
+            groups=posthog_groups,
+        )
+
+        # Use the common capture function
+        capture_streaming_event(self._client._ph_client, event_data)
@@ -133,7 +133,7 @@ async def _create_streaming(
         content_blocks: List[Dict[str, Any]] = []
         tools_in_progress: Dict[str, Dict[str, Any]] = {}
         current_text_block: Optional[Dict[str, Any]] = None
-        response = super().create(**kwargs)
+        response = await super().create(**kwargs)
 
         async def generator():
             nonlocal usage_stats
 
@@ -15,6 +15,7 @@
     FormattedTextContent,
     StreamingContentBlock,
     StreamingUsageStats,
+    TokenUsage,
     ToolInProgress,
 )
 
@@ -320,3 +321,67 @@ def finalize_anthropic_tool_input(
                 pass
 
             del tools_in_progress[block["id"]]
+
+
+def standardize_anthropic_usage(usage: Dict[str, Any]) -> TokenUsage:
+    """
+    Standardize Anthropic usage statistics to common TokenUsage format.
+
+    Anthropic already uses standard field names, so this mainly structures the data.
+
+    Args:
+        usage: Raw usage statistics from Anthropic
+
+    Returns:
+        Standardized TokenUsage dict
+    """
+    return TokenUsage(
+        input_tokens=usage.get("input_tokens", 0),
+        output_tokens=usage.get("output_tokens", 0),
+        cache_read_input_tokens=usage.get("cache_read_input_tokens"),
+        cache_creation_input_tokens=usage.get("cache_creation_input_tokens"),
+    )
+
+
+def format_anthropic_streaming_input(kwargs: Dict[str, Any]) -> Any:
+    """
+    Format Anthropic streaming input using system prompt merging.
+
+    Args:
+        kwargs: Keyword arguments passed to Anthropic API
+
+    Returns:
+        Formatted input ready for PostHog tracking
+    """
+    from posthog.ai.utils import merge_system_prompt
+
+    return merge_system_prompt(kwargs, "anthropic")
+
+
+def format_anthropic_streaming_output_complete(
+    content_blocks: List[StreamingContentBlock], accumulated_content: str
+) -> List[FormattedMessage]:
+    """
+    Format complete Anthropic streaming output.
+
+    Combines existing logic for formatting content blocks with fallback to accumulated content.
+
+    Args:
+        content_blocks: List of content blocks accumulated during streaming
+        accumulated_content: Raw accumulated text content as fallback
+
+    Returns:
+        Formatted messages ready for PostHog tracking
+    """
+    formatted_content = format_anthropic_streaming_content(content_blocks)
+
+    if formatted_content:
+        return [{"role": "assistant", "content": formatted_content}]
+    else:
+        # Fallback to accumulated content if no blocks
+        return [
+            {
+                "role": "assistant",
+                "content": [{"type": "text", "text": accumulated_content}],
+            }
+        ]
@@ -13,8 +13,7 @@
 from posthog import setup
 from posthog.ai.utils import (
     call_llm_and_track_usage,
-    get_model_params,
-    with_privacy_mode,
+    capture_streaming_event,
 )
 from posthog.ai.gemini.gemini_converter import (
     format_gemini_input,
@@ -352,42 +351,28 @@ def _capture_streaming_event(
         latency: float,
         output: str,
     ):
-        if trace_id is None:
-            trace_id = str(uuid.uuid4())
-
-        event_properties = {
-            "$ai_provider": "gemini",
-            "$ai_model": model,
-            "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(
-                self._ph_client,
-                privacy_mode,
-                self._format_input(contents),
-            ),
-            "$ai_output_choices": with_privacy_mode(
-                self._ph_client,
-                privacy_mode,
-                format_gemini_streaming_output(output),
-            ),
-            "$ai_http_status": 200,
-            "$ai_input_tokens": usage_stats.get("input_tokens", 0),
-            "$ai_output_tokens": usage_stats.get("output_tokens", 0),
-            "$ai_latency": latency,
-            "$ai_trace_id": trace_id,
-            "$ai_base_url": self._base_url,
-            **(properties or {}),
-        }
-
-        if distinct_id is None:
-            event_properties["$process_person_profile"] = False
-
-        if hasattr(self._ph_client, "capture"):
-            self._ph_client.capture(
-                distinct_id=distinct_id,
-                event="$ai_generation",
-                properties=event_properties,
-                groups=groups,
-            )
+        from posthog.ai.types import StreamingEventData
+        from posthog.ai.gemini.gemini_converter import standardize_gemini_usage
+
+        # Prepare standardized event data
+        event_data = StreamingEventData(
+            provider="gemini",
+            model=model,
+            base_url=self._base_url,
+            kwargs=kwargs,
+            formatted_input=self._format_input(contents),
+            formatted_output=format_gemini_streaming_output(output),
+            usage_stats=standardize_gemini_usage(usage_stats),
+            latency=latency,
+            distinct_id=distinct_id,
+            trace_id=trace_id,
+            properties=properties,
+            privacy_mode=privacy_mode,
+            groups=groups,
+        )
+
+        # Use the common capture function
+        capture_streaming_event(self._ph_client, event_data)
 
     def _format_input(self, contents):
         """Format input contents for PostHog tracking"""
 
@@ -13,6 +13,7 @@
     FormattedMessage,
     FormattedTextContent,
     StreamingUsageStats,
+    TokenUsage,
 )
 
 
@@ -344,3 +345,22 @@ def format_gemini_streaming_output(
         text = str(accumulated_content)
 
     return [{"role": "assistant", "content": [{"type": "text", "text": text}]}]
+
+
+def standardize_gemini_usage(usage: Dict[str, Any]) -> TokenUsage:
+    """
+    Standardize Gemini usage statistics to common TokenUsage format.
+
+    Gemini already uses standard field names (input_tokens/output_tokens).
+
+    Args:
+        usage: Raw usage statistics from Gemini
+
+    Returns:
+        Standardized TokenUsage dict
+    """
+    return TokenUsage(
+        input_tokens=usage.get("input_tokens", 0),
+        output_tokens=usage.get("output_tokens", 0),
+        # Gemini doesn't currently support cache or reasoning tokens
+    )