@@ -109,26 +109,11 @@ def get_start_span_function():
109109
110110def truncate_messages_by_size (messages , max_bytes = MAX_GEN_AI_MESSAGE_BYTES ):
111111 # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]]
112- """
113- Truncate messages by removing the oldest ones until the serialized size is within limits.
114- If the last message is still too large, truncate its content instead of removing it entirely.
115-
116- This function prioritizes keeping the most recent messages while ensuring the total
117- serialized size stays under the specified byte limit. It uses the Sentry serializer
118- to get accurate size estimates that match what will actually be sent.
119-
120- Always preserves at least one message, even if content needs to be truncated.
121-
122- :param messages: List of message objects (typically with 'role' and 'content' keys)
123- :param max_bytes: Maximum allowed size in bytes for the serialized messages
124- :returns: Truncated list of messages that fits within the size limit
125- """
126112 if not messages :
127113 return messages
128114
129115 truncated_messages = list (messages )
130116
131- # First, remove older messages until we're under the limit or have only one message left
132117 while len (truncated_messages ) > 1 :
133118 serialized = serialize (
134119 truncated_messages , is_vars = False , max_value_length = round (max_bytes * 0.8 )
@@ -139,10 +124,8 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
139124 if current_size <= max_bytes :
140125 break
141126
142- truncated_messages .pop (0 ) # Remove oldest message
127+ truncated_messages .pop (0 )
143128
144- # If we still have one message but it's too large, truncate its content
145- # This ensures we always preserve at least one message
146129 if len (truncated_messages ) == 1 :
147130 serialized = serialize (
148131 truncated_messages , is_vars = False , max_value_length = round (max_bytes * 0.8 )
@@ -151,7 +134,6 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
151134 current_size = len (serialized_json .encode ("utf-8" ))
152135
153136 if current_size > max_bytes :
154- # Truncate the content of the last message
155137 last_message = truncated_messages [0 ].copy ()
156138 content = last_message .get ("content" , "" )
157139
@@ -162,71 +144,24 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
162144 return truncated_messages
163145
164146
165- def serialize_gen_ai_messages (messages , max_bytes = MAX_GEN_AI_MESSAGE_BYTES ):
166- # type: (Optional[Any], int) -> Optional[str]
167- """
168- Serialize and truncate gen_ai messages for storage in spans.
169-
170- This function handles the complete workflow of:
171- 1. Truncating messages to fit within size limits (if not already done)
172- 2. Serializing them using Sentry's serializer (which processes AnnotatedValue for _meta)
173- 3. Converting to JSON string for storage
174-
175- :param messages: List of message objects, AnnotatedValue, or None
176- :param max_bytes: Maximum allowed size in bytes for the serialized messages
177- :returns: JSON string of serialized messages or None if input was None/empty
178- """
147+ def truncate_and_annotate_messages (
148+ messages , span , scope , max_bytes = MAX_GEN_AI_MESSAGE_BYTES
149+ ):
150+ # type: (Optional[List[Dict[str, Any]]], Any, Any, int) -> Optional[List[Dict[str, Any]]]
179151 if not messages :
180152 return None
181153
182- if isinstance (messages , AnnotatedValue ):
183- serialized_messages = serialize (
184- messages , is_vars = False , max_value_length = round (max_bytes * 0.8 )
185- )
186- return json .dumps (serialized_messages , separators = ("," , ":" ))
187-
154+ original_count = len (messages )
188155 truncated_messages = truncate_messages_by_size (messages , max_bytes )
189- serialized_messages = serialize (
190- truncated_messages , is_vars = False , max_value_length = round (max_bytes * 0.8 )
191- )
192-
193- return json .dumps (serialized_messages )
194156
195-
196- def truncate_and_serialize_messages (messages , max_bytes = MAX_GEN_AI_MESSAGE_BYTES ):
197- # type: (Optional[List[Dict[str, Any]]], int) -> Any
198- """
199- Truncate messages and return serialized string or AnnotatedValue for automatic _meta creation.
200-
201- This function handles truncation and always returns serialized JSON strings. When truncation
202- occurs, it wraps the serialized string in an AnnotatedValue so that Sentry's serializer can
203- automatically create the appropriate _meta structure.
204-
205- :param messages: List of message objects or None
206- :param max_bytes: Maximum allowed size in bytes for the serialized messages
207- :returns: JSON string, AnnotatedValue containing JSON string (if truncated), or None
208- """
209- if not messages :
210- return None
211-
212- truncated_messages = truncate_messages_by_size (messages , max_bytes )
213157 if not truncated_messages :
214158 return None
215159
216- # Always serialize to JSON string
217- serialized_json = serialize_gen_ai_messages (truncated_messages , max_bytes )
218- if not serialized_json :
219- return None
220-
221- original_count = len (messages )
222160 truncated_count = len (truncated_messages )
161+ n_removed = original_count - truncated_count
223162
224- # If truncation occurred, wrap the serialized string in AnnotatedValue for _meta
225- if original_count != truncated_count :
226- return AnnotatedValue (
227- value = serialized_json ,
228- metadata = {"len" : original_count },
229- )
163+ if n_removed > 0 :
164+ scope ._gen_ai_messages_truncated [span .span_id ] = n_removed
165+ span .set_data ("_gen_ai_messages_original_count" , original_count )
230166
231- # No truncation, return plain serialized string
232- return serialized_json
167+ return truncated_messages
0 commit comments