1+ """
2+ Gemini input format converter module.
3+
4+ This module handles the conversion of various Gemini input formats into a standardized
5+ format for PostHog tracking. It eliminates code duplication between gemini.py and utils.py.
6+ """
7+
8+ from typing import Any , Dict , List , TypedDict , Union
9+
10+
11+ class GeminiPart (TypedDict , total = False ):
12+ """Represents a part in a Gemini message."""
13+ text : str
14+
15+
16+ class GeminiMessage (TypedDict , total = False ):
17+ """Represents a Gemini message with various possible fields."""
18+ role : str
19+ parts : List [Union [GeminiPart , Dict [str , Any ]]]
20+ content : Union [str , List [Any ]]
21+ text : str
22+
23+
24+ class FormattedMessage (TypedDict ):
25+ """Standardized message format for PostHog tracking."""
26+ role : str
27+ content : str
28+
29+
30+ def _extract_text_from_parts (parts : List [Any ]) -> str :
31+ """
32+ Extract and concatenate text from a parts array.
33+
34+ Args:
35+ parts: List of parts that may contain text content
36+
37+ Returns:
38+ Concatenated text from all parts
39+ """
40+ content_parts = []
41+
42+ for part in parts :
43+ if isinstance (part , dict ) and "text" in part :
44+ content_parts .append (part ["text" ])
45+ elif isinstance (part , str ):
46+ content_parts .append (part )
47+ elif hasattr (part , "text" ):
48+ # Get the text attribute value
49+ text_value = getattr (part , "text" , "" )
50+ content_parts .append (text_value if text_value else str (part ))
51+ else :
52+ content_parts .append (str (part ))
53+
54+ return "" .join (content_parts )
55+
56+
57+ def _format_dict_message (item : Dict [str , Any ]) -> FormattedMessage :
58+ """
59+ Format a dictionary message into standardized format.
60+
61+ Args:
62+ item: Dictionary containing message data
63+
64+ Returns:
65+ Formatted message with role and content
66+ """
67+ # Handle dict format with parts array (Gemini-specific format)
68+ if "parts" in item and isinstance (item ["parts" ], list ):
69+ content = _extract_text_from_parts (item ["parts" ])
70+ return {"role" : item .get ("role" , "user" ), "content" : content }
71+
72+ # Handle dict with content field
73+ if "content" in item :
74+ content = item ["content" ]
75+ if isinstance (content , list ):
76+ # If content is a list, extract text from it
77+ content = _extract_text_from_parts (content )
78+ elif not isinstance (content , str ):
79+ content = str (content )
80+ return {"role" : item .get ("role" , "user" ), "content" : content }
81+
82+ # Handle dict with text field
83+ if "text" in item :
84+ return {"role" : item .get ("role" , "user" ), "content" : item ["text" ]}
85+
86+ # Fallback to string representation
87+ return {"role" : "user" , "content" : str (item )}
88+
89+
90+ def _format_object_message (item : Any ) -> FormattedMessage :
91+ """
92+ Format an object (with attributes) into standardized format.
93+
94+ Args:
95+ item: Object that may have text or parts attributes
96+
97+ Returns:
98+ Formatted message with role and content
99+ """
100+ # Handle object with parts attribute
101+ if hasattr (item , "parts" ) and hasattr (item .parts , "__iter__" ):
102+ content = _extract_text_from_parts (item .parts )
103+ role = getattr (item , "role" , "user" ) if hasattr (item , "role" ) else "user"
104+ # Ensure role is a string
105+ if not isinstance (role , str ):
106+ role = "user"
107+ return {"role" : role , "content" : content }
108+
109+ # Handle object with text attribute
110+ if hasattr (item , "text" ):
111+ role = getattr (item , "role" , "user" ) if hasattr (item , "role" ) else "user"
112+ # Ensure role is a string
113+ if not isinstance (role , str ):
114+ role = "user"
115+ return {"role" : role , "content" : item .text }
116+
117+ # Handle object with content attribute
118+ if hasattr (item , "content" ):
119+ role = getattr (item , "role" , "user" ) if hasattr (item , "role" ) else "user"
120+ # Ensure role is a string
121+ if not isinstance (role , str ):
122+ role = "user"
123+ content = item .content
124+ if isinstance (content , list ):
125+ content = _extract_text_from_parts (content )
126+ elif not isinstance (content , str ):
127+ content = str (content )
128+ return {"role" : role , "content" : content }
129+
130+ # Fallback to string representation
131+ return {"role" : "user" , "content" : str (item )}
132+
133+
134+ def format_gemini_input (contents : Any ) -> List [FormattedMessage ]:
135+ """
136+ Format Gemini input contents into standardized message format for PostHog tracking.
137+
138+ This function handles various input formats:
139+ - String inputs
140+ - List of strings, dicts, or objects
141+ - Single dict or object
142+ - Gemini-specific format with parts array
143+
144+ Args:
145+ contents: Input contents in various possible formats
146+
147+ Returns:
148+ List of formatted messages with role and content fields
149+ """
150+ # Handle string input
151+ if isinstance (contents , str ):
152+ return [{"role" : "user" , "content" : contents }]
153+
154+ # Handle list input
155+ if isinstance (contents , list ):
156+ formatted = []
157+ for item in contents :
158+ if isinstance (item , str ):
159+ formatted .append ({"role" : "user" , "content" : item })
160+ elif isinstance (item , dict ):
161+ formatted .append (_format_dict_message (item ))
162+ else :
163+ formatted .append (_format_object_message (item ))
164+ return formatted
165+
166+ # Handle single dict input
167+ if isinstance (contents , dict ):
168+ return [_format_dict_message (contents )]
169+
170+ # Handle single object input
171+ return [_format_object_message (contents )]
0 commit comments