@@ -90,8 +90,8 @@ def __init__(self, logger_provider: LoggerProvider):
9090 self ._event_logger_provider = EventLoggerProvider (logger_provider = self ._logger_provider )
9191 self ._event_logger = self ._event_logger_provider .get_event_logger ("gen_ai.events" )
9292
93- # Patterns for attribute filtering
94- self ._exact_match_patterns = [
93+ # Patterns for attribute filtering - using a set for O(1) lookups
94+ self ._exact_match_patterns = {
9595 TRACELOOP_ENTITY_INPUT ,
9696 TRACELOOP_ENTITY_OUTPUT ,
9797 TRACELOOP_CREW_TASKS_OUTPUT ,
@@ -103,7 +103,7 @@ def __init__(self, logger_provider: LoggerProvider):
103103 OPENLIT_AGENT_HUMAN_INPUT ,
104104 OPENINFERENCE_INPUT_VALUE ,
105105 OPENINFERENCE_OUTPUT_VALUE ,
106- ]
106+ }
107107
108108 # Pre-compile regex patterns for better performance
109109 self ._regex_patterns = [
@@ -236,6 +236,16 @@ def _emit_llo_attributes(
236236 Returns:
237237 None: Events are emitted via the event logger
238238 """
239+ # Quick check if we have any LLO attributes before running extractors
240+ has_llo_attrs = False
241+ for key in attributes :
242+ if self ._is_llo_attribute (key ):
243+ has_llo_attrs = True
244+ break
245+
246+ if not has_llo_attrs :
247+ return
248+
239249 all_events = []
240250 all_events .extend (self ._extract_gen_ai_prompt_events (span , attributes , event_timestamp ))
241251 all_events .extend (self ._extract_gen_ai_completion_events (span , attributes , event_timestamp ))
@@ -261,8 +271,19 @@ def _filter_attributes(self, attributes: Dict[str, Any]) -> Dict[str, Any]:
261271 Returns:
262272 Dict[str, Any]: New dictionary with LLO attributes removed
263273 """
274+ # First check if we need to filter anything
275+ has_llo_attrs = False
276+ for key in attributes :
277+ if self ._is_llo_attribute (key ):
278+ has_llo_attrs = True
279+ break
280+
281+ # If no LLO attributes found, return the original attributes (no need to copy)
282+ if not has_llo_attrs :
283+ return attributes
284+
285+ # Otherwise, create filtered copy
264286 filtered_attributes = {}
265-
266287 for key , value in attributes .items ():
267288 if not self ._is_llo_attribute (key ):
268289 filtered_attributes [key ] = value
@@ -290,12 +311,16 @@ def _is_llo_attribute(self, key: str) -> bool:
290311 Returns:
291312 bool: True if the key matches any LLO pattern, False otherwise
292313 """
293- # Check exact matches first (faster )
314+ # Check exact matches first (O(1) lookup in a set )
294315 if key in self ._exact_match_patterns :
295316 return True
296317
297318 # Then check regex patterns
298- return any (pattern .match (key ) for pattern in self ._regex_patterns )
319+ for pattern in self ._regex_patterns :
320+ if pattern .match (key ):
321+ return True
322+
323+ return False
299324
300325 def _extract_gen_ai_prompt_events (
301326 self , span : ReadableSpan , attributes : Dict [str , Any ], event_timestamp : Optional [int ] = None
@@ -321,22 +346,29 @@ def _extract_gen_ai_prompt_events(
321346 Returns:
322347 List[Event]: Events created from prompt attributes
323348 """
349+ # Quick check if any prompt content attributes exist
350+ if not any (self ._prompt_content_pattern .match (key ) for key in attributes ):
351+ return []
352+
324353 events = []
325354 span_ctx = span .context
326355 gen_ai_system = span .attributes .get ("gen_ai.system" , "unknown" )
327356
328357 # Use helper method to get appropriate timestamp (prompts are inputs)
329358 prompt_timestamp = self ._get_timestamp (span , event_timestamp , is_input = True )
330359
360+ # Find all prompt content attributes and their roles
361+ prompt_content_matches = {}
331362 for key , value in attributes .items ():
332363 match = self ._prompt_content_pattern .match (key )
333- if not match :
334- continue
335-
336- index = match .group (1 )
337- role_key = f"gen_ai.prompt.{ index } .role"
338- role = attributes .get (role_key , "unknown" )
339-
364+ if match :
365+ index = match .group (1 )
366+ role_key = f"gen_ai.prompt.{ index } .role"
367+ role = attributes .get (role_key , "unknown" )
368+ prompt_content_matches [index ] = (key , value , role )
369+
370+ # Create events for each content+role pair
371+ for index , (key , value , role ) in prompt_content_matches .items ():
340372 event_attributes = {"gen_ai.system" : gen_ai_system , "original_attribute" : key }
341373 body = {"content" : value , "role" : role }
342374
@@ -376,22 +408,29 @@ def _extract_gen_ai_completion_events(
376408 Returns:
377409 List[Event]: Events created from completion attributes
378410 """
411+ # Quick check if any completion content attributes exist
412+ if not any (self ._completion_content_pattern .match (key ) for key in attributes ):
413+ return []
414+
379415 events = []
380416 span_ctx = span .context
381417 gen_ai_system = span .attributes .get ("gen_ai.system" , "unknown" )
382418
383419 # Use helper method to get appropriate timestamp (completions are outputs)
384420 completion_timestamp = self ._get_timestamp (span , event_timestamp , is_input = False )
385421
422+ # Find all completion content attributes and their roles
423+ completion_content_matches = {}
386424 for key , value in attributes .items ():
387425 match = self ._completion_content_pattern .match (key )
388- if not match :
389- continue
390-
391- index = match .group (1 )
392- role_key = f"gen_ai.completion.{ index } .role"
393- role = attributes .get (role_key , "unknown" )
394-
426+ if match :
427+ index = match .group (1 )
428+ role_key = f"gen_ai.completion.{ index } .role"
429+ role = attributes .get (role_key , "unknown" )
430+ completion_content_matches [index ] = (key , value , role )
431+
432+ # Create events for each content+role pair
433+ for index , (key , value , role ) in completion_content_matches .items ():
395434 event_attributes = {"gen_ai.system" : gen_ai_system , "original_attribute" : key }
396435 body = {"content" : value , "role" : role }
397436
@@ -437,6 +476,18 @@ def _extract_traceloop_events(
437476 Returns:
438477 List[Event]: Events created from Traceloop attributes
439478 """
479+ # Define the Traceloop attributes we're looking for
480+ traceloop_keys = {
481+ TRACELOOP_ENTITY_INPUT ,
482+ TRACELOOP_ENTITY_OUTPUT ,
483+ TRACELOOP_CREW_TASKS_OUTPUT ,
484+ TRACELOOP_CREW_RESULT ,
485+ }
486+
487+ # Quick check if any Traceloop attributes exist
488+ if not any (key in attributes for key in traceloop_keys ):
489+ return []
490+
440491 events = []
441492 span_ctx = span .context
442493 # Use traceloop.entity.name for the gen_ai.system value
@@ -521,6 +572,19 @@ def _extract_openlit_span_event_attributes(
521572 Returns:
522573 List[Event]: Events created from OpenLit attributes
523574 """
575+ # Define the OpenLit attributes we're looking for
576+ openlit_keys = {
577+ OPENLIT_PROMPT ,
578+ OPENLIT_COMPLETION ,
579+ OPENLIT_REVISED_PROMPT ,
580+ OPENLIT_AGENT_ACTUAL_OUTPUT ,
581+ OPENLIT_AGENT_HUMAN_INPUT ,
582+ }
583+
584+ # Quick check if any OpenLit attributes exist
585+ if not any (key in attributes for key in openlit_keys ):
586+ return []
587+
524588 events = []
525589 span_ctx = span .context
526590 gen_ai_system = span .attributes .get ("gen_ai.system" , "unknown" )
@@ -597,6 +661,17 @@ def _extract_openinference_attributes(
597661 Returns:
598662 List[Event]: Events created from OpenInference attributes
599663 """
664+ # Define the OpenInference keys/patterns we're looking for
665+ openinference_direct_keys = {OPENINFERENCE_INPUT_VALUE , OPENINFERENCE_OUTPUT_VALUE }
666+
667+ # Quick check if any OpenInference attributes exist
668+ has_direct_attrs = any (key in attributes for key in openinference_direct_keys )
669+ has_input_msgs = any (self ._openinference_input_msg_pattern .match (key ) for key in attributes )
670+ has_output_msgs = any (self ._openinference_output_msg_pattern .match (key ) for key in attributes )
671+
672+ if not (has_direct_attrs or has_input_msgs or has_output_msgs ):
673+ return []
674+
600675 events = []
601676 span_ctx = span .context
602677 gen_ai_system = span .attributes .get ("llm.model_name" , "unknown" )
@@ -626,15 +701,17 @@ def _extract_openinference_attributes(
626701 events .append (event )
627702
628703 # Process input messages
704+ input_messages = {}
629705 for key , value in attributes .items ():
630706 match = self ._openinference_input_msg_pattern .match (key )
631- if not match :
632- continue
633-
634- index = match .group (1 )
635- role_key = f"llm.input_messages.{ index } .message.role"
636- role = attributes .get (role_key , ROLE_USER ) # Default to user if role not specified
637-
707+ if match :
708+ index = match .group (1 )
709+ role_key = f"llm.input_messages.{ index } .message.role"
710+ role = attributes .get (role_key , ROLE_USER ) # Default to user if role not specified
711+ input_messages [index ] = (key , value , role )
712+
713+ # Create events for input messages
714+ for index , (key , value , role ) in input_messages .items ():
638715 event_attributes = {"gen_ai.system" : gen_ai_system , "original_attribute" : key }
639716 body = {"content" : value , "role" : role }
640717
@@ -648,15 +725,17 @@ def _extract_openinference_attributes(
648725 events .append (event )
649726
650727 # Process output messages
728+ output_messages = {}
651729 for key , value in attributes .items ():
652730 match = self ._openinference_output_msg_pattern .match (key )
653- if not match :
654- continue
655-
656- index = match .group (1 )
657- role_key = f"llm.output_messages.{ index } .message.role"
658- role = attributes .get (role_key , ROLE_ASSISTANT ) # Default to assistant if role not specified
659-
731+ if match :
732+ index = match .group (1 )
733+ role_key = f"llm.output_messages.{ index } .message.role"
734+ role = attributes .get (role_key , ROLE_ASSISTANT ) # Default to assistant if role not specified
735+ output_messages [index ] = (key , value , role )
736+
737+ # Create events for output messages
738+ for index , (key , value , role ) in output_messages .items ():
660739 event_attributes = {"gen_ai.system" : gen_ai_system , "original_attribute" : key }
661740 body = {"content" : value , "role" : role }
662741
0 commit comments