add llm prompts attribute support

yiyuan-he · yiyuan-he · commit 6a18234d7b55 · 2025-06-23T05:08:17.000Z
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/llo_handler.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/llo_handler.py
@@ -132,6 +132,11 @@ class PatternConfig(TypedDict, total=False):
         "role": ROLE_ASSISTANT,
         "source": "output",
     },
+    "llm.prompts": {
+        "type": PatternType.DIRECT,
+        "role": ROLE_USER,
+        "source": "prompt",
+    },
 }
 
 
@@ -258,16 +263,17 @@ def process_spans(self, spans: Sequence[ReadableSpan]) -> List[ReadableSpan]:
         Processes a sequence of spans to extract and filter LLO attributes.
 
         For each span, this method:
-        1. Extracts LLO attributes and emits them as Gen AI Events
-        2. Filters out LLO attributes from the span to maintain privacy
-        3. Processes any LLO attributes in span events
+        1. Collects all LLO attributes from span attributes and all span events
+        2. Emits a single consolidated Gen AI Event with all collected LLO content
+        3. Filters out LLO attributes from the span and its events to maintain privacy
         4. Preserves non-LLO attributes in the span
 
         Handles LLO attributes from multiple frameworks:
         - Traceloop (indexed prompt/completion patterns and entity input/output)
-        - OpenLit (direct prompt/completion patterns)
+        - OpenLit (direct prompt/completion patterns, including from span events)
         - OpenInference (input/output values and structured messages)
         - Strands SDK (system prompts and tool results)
+        - CrewAI (tasks output and results)
 
         Args:
             spans: A sequence of OpenTelemetry ReadableSpan objects to process
@@ -278,8 +284,29 @@ def process_spans(self, spans: Sequence[ReadableSpan]) -> List[ReadableSpan]:
         modified_spans = []
 
         for span in spans:
+            # Collect all LLO attributes from both span attributes and events
+            all_llo_attributes = {}
+
+            # Collect from span attributes
+            if span.attributes is not None:
+                for key, value in span.attributes.items():
+                    if self._is_llo_attribute(key):
+                        all_llo_attributes[key] = value
+
+            # Collect from span events
+            if span.events:
+                for event in span.events:
+                    if event.attributes:
+                        for key, value in event.attributes.items():
+                            if self._is_llo_attribute(key):
+                                all_llo_attributes[key] = value
+
+            # Emit a single consolidated event if we found any LLO attributes
+            if all_llo_attributes:
+                self._emit_llo_attributes(span, all_llo_attributes)
+
+            # Filter span attributes
             if span.attributes is not None:
-                self._emit_llo_attributes(span, span.attributes)
                 updated_attributes = self._filter_attributes(span.attributes)
             else:
                 updated_attributes = None
@@ -294,27 +321,22 @@ def process_spans(self, spans: Sequence[ReadableSpan]) -> List[ReadableSpan]:
             else:
                 span._attributes = updated_attributes
 
-            self.process_span_events(span)
+            # Filter span events
+            self._filter_span_events(span)
 
             modified_spans.append(span)
 
         return modified_spans
 
-    def process_span_events(self, span: ReadableSpan) -> None:
+    def _filter_span_events(self, span: ReadableSpan) -> None:
         """
-        Process events within a span to extract and filter LLO attributes.
+        Filter LLO attributes from span events.
 
-        For each event in the span, this method:
-        1. Emits LLO attributes found in event attributes as Gen AI Events
-        2. Filters out LLO attributes from event attributes
-        3. Creates updated events with filtered attributes
-        4. Replaces the original span events with updated events
-
-        This ensures that LLO attributes are properly handled even when they appear
-        in span events rather than directly in the span's attributes.
+        This method removes LLO attributes from event attributes while preserving
+        the event structure and non-LLO attributes.
 
         Args:
-            span: The ReadableSpan to process events for
+            span: The ReadableSpan to filter events for
 
         Returns:
             None: The span is modified in-place
@@ -329,9 +351,6 @@ def process_span_events(self, span: ReadableSpan) -> None:
                 updated_events.append(event)
                 continue
 
-            if event.attributes is not None:
-                self._emit_llo_attributes(span, event.attributes, event_timestamp=event.timestamp)
-
             updated_event_attributes = self._filter_attributes(event.attributes)
 
             if updated_event_attributes is not None and len(updated_event_attributes) != len(event.attributes):
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/llo_handler/test_llo_handler_events.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/llo_handler/test_llo_handler_events.py
@@ -460,3 +460,104 @@ def test_group_messages_by_type_missing_fields(self):
         # Complete message goes to output
         self.assertEqual(len(result["output"]), 1)
         self.assertEqual(result["output"][0], {"role": "assistant", "content": "Complete message"})
+
+    def test_emit_llo_attributes_with_llm_prompts(self):
+        """
+        Test that llm.prompts attribute is properly emitted in the input section.
+        """
+        llm_prompts_content = "[{'role': 'system', 'content': [{'text': 'You are helpful.', 'type': 'text'}]}]"
+        attributes = {
+            "llm.prompts": llm_prompts_content,
+            "gen_ai.completion.0.content": "I understand.",
+            "gen_ai.completion.0.role": "assistant",
+        }
+
+        span = self._create_mock_span(attributes)
+        span.end_time = 1234567899
+        span.instrumentation_scope = MagicMock()
+        span.instrumentation_scope.name = "test.scope"
+
+        self.llo_handler._emit_llo_attributes(span, attributes)
+
+        self.event_logger_mock.emit.assert_called_once()
+        emitted_event = self.event_logger_mock.emit.call_args[0][0]
+
+        event_body = emitted_event.body
+
+        # Check that llm.prompts is in input section
+        self.assertIn("input", event_body)
+        self.assertIn("output", event_body)
+
+        input_messages = event_body["input"]["messages"]
+        self.assertEqual(len(input_messages), 1)
+        self.assertEqual(input_messages[0]["content"], llm_prompts_content)
+        self.assertEqual(input_messages[0]["role"], "user")
+
+        # Check output section has the completion
+        output_messages = event_body["output"]["messages"]
+        self.assertEqual(len(output_messages), 1)
+        self.assertEqual(output_messages[0]["content"], "I understand.")
+        self.assertEqual(output_messages[0]["role"], "assistant")
+
+    def test_emit_llo_attributes_openlit_style_events(self):
+        """
+        Test that LLO attributes from OpenLit-style span events are collected and emitted
+        in a single consolidated event, not as separate events.
+        """
+        # This test simulates the OpenLit pattern where prompt and completion are in span events
+        # The span processor should collect from both and emit a single event
+
+        span_attributes = {"normal.attribute": "value"}
+
+        # Create events like OpenLit does
+        prompt_event_attrs = {"gen_ai.prompt": "Explain quantum computing"}
+        prompt_event = MagicMock(attributes=prompt_event_attrs, timestamp=1234567890)
+
+        completion_event_attrs = {"gen_ai.completion": "Quantum computing is..."}
+        completion_event = MagicMock(attributes=completion_event_attrs, timestamp=1234567891)
+
+        span = self._create_mock_span(span_attributes)
+        span.events = [prompt_event, completion_event]
+        span.end_time = 1234567899
+        span.instrumentation_scope = MagicMock()
+        span.instrumentation_scope.name = "openlit.otel.tracing"
+
+        # Process the span (this would normally be called by process_spans)
+        all_llo_attrs = {}
+
+        # Collect from span attributes
+        for key, value in span_attributes.items():
+            if self.llo_handler._is_llo_attribute(key):
+                all_llo_attrs[key] = value
+
+        # Collect from events
+        for event in span.events:
+            if event.attributes:
+                for key, value in event.attributes.items():
+                    if self.llo_handler._is_llo_attribute(key):
+                        all_llo_attrs[key] = value
+
+        # Emit consolidated event
+        self.llo_handler._emit_llo_attributes(span, all_llo_attrs)
+
+        # Verify single event was emitted with both input and output
+        self.event_logger_mock.emit.assert_called_once()
+        emitted_event = self.event_logger_mock.emit.call_args[0][0]
+
+        event_body = emitted_event.body
+
+        # Both input and output should be in the same event
+        self.assertIn("input", event_body)
+        self.assertIn("output", event_body)
+
+        # Check input section
+        input_messages = event_body["input"]["messages"]
+        self.assertEqual(len(input_messages), 1)
+        self.assertEqual(input_messages[0]["content"], "Explain quantum computing")
+        self.assertEqual(input_messages[0]["role"], "user")
+
+        # Check output section
+        output_messages = event_body["output"]["messages"]
+        self.assertEqual(len(output_messages), 1)
+        self.assertEqual(output_messages[0]["content"], "Quantum computing is...")
+        self.assertEqual(output_messages[0]["role"], "assistant")
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/llo_handler/test_llo_handler_frameworks.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/llo_handler/test_llo_handler_frameworks.py
@@ -393,3 +393,52 @@ def test_collect_strands_sdk_messages(self):
         self.assertIsNotNone(tool_msg)
         self.assertEqual(tool_msg["role"], "assistant")
         self.assertEqual(tool_msg["source"], "output")
+
+    def test_collect_llm_prompts_messages(self):
+        """
+        Verify llm.prompts attribute is collected as a user message with prompt source.
+        """
+        attributes = {
+            "llm.prompts": (
+                "[{'role': 'system', 'content': [{'text': 'You are a helpful AI assistant.', 'type': 'text'}]}, "
+                "{'role': 'user', 'content': [{'text': 'What are the benefits of using FastAPI?', 'type': 'text'}]}]"
+            ),
+            "other.attribute": "not collected",
+        }
+
+        span = self._create_mock_span(attributes)
+        messages = self.llo_handler._collect_all_llo_messages(span, attributes)
+
+        self.assertEqual(len(messages), 1)
+        message = messages[0]
+        self.assertEqual(message["content"], attributes["llm.prompts"])
+        self.assertEqual(message["role"], "user")
+        self.assertEqual(message["source"], "prompt")
+
+    def test_collect_llm_prompts_with_other_messages(self):
+        """
+        Verify llm.prompts works correctly alongside other LLO attributes.
+        """
+        attributes = {
+            "llm.prompts": "[{'role': 'system', 'content': 'System prompt'}]",
+            "gen_ai.prompt": "Direct prompt",
+            "gen_ai.completion": "Assistant response",
+        }
+
+        span = self._create_mock_span(attributes)
+        messages = self.llo_handler._collect_all_llo_messages(span, attributes)
+
+        self.assertEqual(len(messages), 3)
+
+        # Check llm.prompts message
+        llm_prompts_msg = next((m for m in messages if m["content"] == attributes["llm.prompts"]), None)
+        self.assertIsNotNone(llm_prompts_msg)
+        self.assertEqual(llm_prompts_msg["role"], "user")
+        self.assertEqual(llm_prompts_msg["source"], "prompt")
+
+        # Check other messages are still collected
+        direct_prompt_msg = next((m for m in messages if m["content"] == "Direct prompt"), None)
+        self.assertIsNotNone(direct_prompt_msg)
+
+        completion_msg = next((m for m in messages if m["content"] == "Assistant response"), None)
+        self.assertIsNotNone(completion_msg)
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/llo_handler/test_llo_handler_patterns.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/llo_handler/test_llo_handler_patterns.py
@@ -73,6 +73,12 @@ def test_is_llo_attribute_strands_sdk_match(self):
         self.assertTrue(self.llo_handler._is_llo_attribute("system_prompt"))
         self.assertTrue(self.llo_handler._is_llo_attribute("tool.result"))
 
+    def test_is_llo_attribute_llm_prompts_match(self):
+        """
+        Verify _is_llo_attribute recognizes llm.prompts pattern.
+        """
+        self.assertTrue(self.llo_handler._is_llo_attribute("llm.prompts"))
+
     def test_build_pattern_matchers_with_missing_regex(self):
         """
         Test _build_pattern_matchers handles patterns with missing regex gracefully
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/llo_handler/test_llo_handler_processing.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/llo_handler/test_llo_handler_processing.py