Skip to content

Commit 4edab3d

Browse files
committed
fix(llma): Gemini content
1 parent 00b4e5a commit 4edab3d

File tree

5 files changed

+201
-33
lines changed

5 files changed

+201
-33
lines changed

posthog/ai/gemini/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .gemini import Client
2+
from .gemini_converter import format_gemini_input, FormattedMessage
23

34

45
# Create a genai-like module for perfect drop-in replacement
@@ -8,4 +9,4 @@ class _GenAI:
89

910
genai = _GenAI()
1011

11-
__all__ = ["Client", "genai"]
12+
__all__ = ["Client", "genai", "format_gemini_input", "FormattedMessage"]

posthog/ai/gemini/gemini.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
get_model_params,
1717
with_privacy_mode,
1818
)
19+
from posthog.ai.gemini.gemini_converter import format_gemini_input
1920
from posthog.client import Client as PostHogClient
2021

2122

@@ -376,20 +377,7 @@ def _capture_streaming_event(
376377

377378
def _format_input(self, contents):
378379
"""Format input contents for PostHog tracking"""
379-
if isinstance(contents, str):
380-
return [{"role": "user", "content": contents}]
381-
elif isinstance(contents, list):
382-
formatted = []
383-
for item in contents:
384-
if isinstance(item, str):
385-
formatted.append({"role": "user", "content": item})
386-
elif hasattr(item, "text"):
387-
formatted.append({"role": "user", "content": item.text})
388-
else:
389-
formatted.append({"role": "user", "content": str(item)})
390-
return formatted
391-
else:
392-
return [{"role": "user", "content": str(contents)}]
380+
return format_gemini_input(contents)
393381

394382
def generate_content_stream(
395383
self,
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""
2+
Gemini input format converter module.
3+
4+
This module handles the conversion of various Gemini input formats into a standardized
5+
format for PostHog tracking. It eliminates code duplication between gemini.py and utils.py.
6+
"""
7+
8+
from typing import Any, Dict, List, TypedDict, Union
9+
10+
11+
class GeminiPart(TypedDict, total=False):
12+
"""Represents a part in a Gemini message."""
13+
text: str
14+
15+
16+
class GeminiMessage(TypedDict, total=False):
17+
"""Represents a Gemini message with various possible fields."""
18+
role: str
19+
parts: List[Union[GeminiPart, Dict[str, Any]]]
20+
content: Union[str, List[Any]]
21+
text: str
22+
23+
24+
class FormattedMessage(TypedDict):
25+
"""Standardized message format for PostHog tracking."""
26+
role: str
27+
content: str
28+
29+
30+
def _extract_text_from_parts(parts: List[Any]) -> str:
31+
"""
32+
Extract and concatenate text from a parts array.
33+
34+
Args:
35+
parts: List of parts that may contain text content
36+
37+
Returns:
38+
Concatenated text from all parts
39+
"""
40+
content_parts = []
41+
42+
for part in parts:
43+
if isinstance(part, dict) and "text" in part:
44+
content_parts.append(part["text"])
45+
elif isinstance(part, str):
46+
content_parts.append(part)
47+
elif hasattr(part, "text"):
48+
# Get the text attribute value
49+
text_value = getattr(part, "text", "")
50+
content_parts.append(text_value if text_value else str(part))
51+
else:
52+
content_parts.append(str(part))
53+
54+
return "".join(content_parts)
55+
56+
57+
def _format_dict_message(item: Dict[str, Any]) -> FormattedMessage:
58+
"""
59+
Format a dictionary message into standardized format.
60+
61+
Args:
62+
item: Dictionary containing message data
63+
64+
Returns:
65+
Formatted message with role and content
66+
"""
67+
# Handle dict format with parts array (Gemini-specific format)
68+
if "parts" in item and isinstance(item["parts"], list):
69+
content = _extract_text_from_parts(item["parts"])
70+
return {"role": item.get("role", "user"), "content": content}
71+
72+
# Handle dict with content field
73+
if "content" in item:
74+
content = item["content"]
75+
if isinstance(content, list):
76+
# If content is a list, extract text from it
77+
content = _extract_text_from_parts(content)
78+
elif not isinstance(content, str):
79+
content = str(content)
80+
return {"role": item.get("role", "user"), "content": content}
81+
82+
# Handle dict with text field
83+
if "text" in item:
84+
return {"role": item.get("role", "user"), "content": item["text"]}
85+
86+
# Fallback to string representation
87+
return {"role": "user", "content": str(item)}
88+
89+
90+
def _format_object_message(item: Any) -> FormattedMessage:
91+
"""
92+
Format an object (with attributes) into standardized format.
93+
94+
Args:
95+
item: Object that may have text or parts attributes
96+
97+
Returns:
98+
Formatted message with role and content
99+
"""
100+
# Handle object with parts attribute
101+
if hasattr(item, "parts") and hasattr(item.parts, "__iter__"):
102+
content = _extract_text_from_parts(item.parts)
103+
role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
104+
# Ensure role is a string
105+
if not isinstance(role, str):
106+
role = "user"
107+
return {"role": role, "content": content}
108+
109+
# Handle object with text attribute
110+
if hasattr(item, "text"):
111+
role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
112+
# Ensure role is a string
113+
if not isinstance(role, str):
114+
role = "user"
115+
return {"role": role, "content": item.text}
116+
117+
# Handle object with content attribute
118+
if hasattr(item, "content"):
119+
role = getattr(item, "role", "user") if hasattr(item, "role") else "user"
120+
# Ensure role is a string
121+
if not isinstance(role, str):
122+
role = "user"
123+
content = item.content
124+
if isinstance(content, list):
125+
content = _extract_text_from_parts(content)
126+
elif not isinstance(content, str):
127+
content = str(content)
128+
return {"role": role, "content": content}
129+
130+
# Fallback to string representation
131+
return {"role": "user", "content": str(item)}
132+
133+
134+
def format_gemini_input(contents: Any) -> List[FormattedMessage]:
135+
"""
136+
Format Gemini input contents into standardized message format for PostHog tracking.
137+
138+
This function handles various input formats:
139+
- String inputs
140+
- List of strings, dicts, or objects
141+
- Single dict or object
142+
- Gemini-specific format with parts array
143+
144+
Args:
145+
contents: Input contents in various possible formats
146+
147+
Returns:
148+
List of formatted messages with role and content fields
149+
"""
150+
# Handle string input
151+
if isinstance(contents, str):
152+
return [{"role": "user", "content": contents}]
153+
154+
# Handle list input
155+
if isinstance(contents, list):
156+
formatted = []
157+
for item in contents:
158+
if isinstance(item, str):
159+
formatted.append({"role": "user", "content": item})
160+
elif isinstance(item, dict):
161+
formatted.append(_format_dict_message(item))
162+
else:
163+
formatted.append(_format_object_message(item))
164+
return formatted
165+
166+
# Handle single dict input
167+
if isinstance(contents, dict):
168+
return [_format_dict_message(contents)]
169+
170+
# Handle single object input
171+
return [_format_object_message(contents)]

posthog/ai/utils.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -319,21 +319,9 @@ def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
319319
return messages
320320
return [{"role": "system", "content": kwargs.get("system")}] + messages
321321
elif provider == "gemini":
322+
from posthog.ai.gemini.gemini_converter import format_gemini_input
322323
contents = kwargs.get("contents", [])
323-
if isinstance(contents, str):
324-
return [{"role": "user", "content": contents}]
325-
elif isinstance(contents, list):
326-
formatted = []
327-
for item in contents:
328-
if isinstance(item, str):
329-
formatted.append({"role": "user", "content": item})
330-
elif hasattr(item, "text"):
331-
formatted.append({"role": "user", "content": item.text})
332-
else:
333-
formatted.append({"role": "user", "content": str(item)})
334-
return formatted
335-
else:
336-
return [{"role": "user", "content": str(contents)}]
324+
return format_gemini_input(contents)
337325

338326
# For OpenAI, handle both Chat Completions and Responses API
339327
if kwargs.get("messages") is not None:

posthog/test/ai/gemini/test_gemini.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -301,13 +301,33 @@ def test_new_client_different_input_formats(
301301
call_args = mock_client.capture.call_args[1]
302302
props = call_args["properties"]
303303
assert props["$ai_input"] == [{"role": "user", "content": "Hello"}]
304+
305+
# Test Gemini-specific format with parts array (like in the screenshot)
306+
mock_client.reset_mock()
307+
client.models.generate_content(
308+
model="gemini-2.0-flash",
309+
contents=[{"role": "user", "parts": [{"text": "hey"}]}],
310+
posthog_distinct_id="test-id"
311+
)
312+
call_args = mock_client.capture.call_args[1]
313+
props = call_args["properties"]
314+
assert props["$ai_input"] == [{"role": "user", "content": "hey"}]
315+
316+
# Test multiple parts in the parts array
317+
mock_client.reset_mock()
318+
client.models.generate_content(
319+
model="gemini-2.0-flash",
320+
contents=[{"role": "user", "parts": [{"text": "Hello "}, {"text": "world"}]}],
321+
posthog_distinct_id="test-id"
322+
)
323+
call_args = mock_client.capture.call_args[1]
324+
props = call_args["properties"]
325+
assert props["$ai_input"] == [{"role": "user", "content": "Hello world"}]
304326

305-
# Test list input
327+
# Test list input with string
306328
mock_client.capture.reset_mock()
307-
mock_part = MagicMock()
308-
mock_part.text = "List item"
309329
client.models.generate_content(
310-
model="gemini-2.0-flash", contents=[mock_part], posthog_distinct_id="test-id"
330+
model="gemini-2.0-flash", contents=["List item"], posthog_distinct_id="test-id"
311331
)
312332
call_args = mock_client.capture.call_args[1]
313333
props = call_args["properties"]

0 commit comments

Comments
 (0)