aws-observability
diff --git a/‎aws-opentelemetry-distro/src/amazon/opentelemetry/distro/llo_handler.py‎
Lines changed: 325 additions & 0 deletions b/‎aws-opentelemetry-distro/src/amazon/opentelemetry/distro/llo_handler.py‎
Lines changed: 325 additions & 0 deletions
@@ -0,0 +1,325 @@
+import logging
+import os
+import re
+import json
+from typing import Dict, Optional, Any
+
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk._logs import LogRecord
+from opentelemetry._logs.severity import SeverityNumber
+from opentelemetry.trace import TraceFlags
+from opentelemetry.sdk._logs import LoggerProvider
+from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
+from opentelemetry._logs import get_logger
+from amazon.opentelemetry.distro.otlp_aws_logs_exporter import OTLPAwsLogExporter
+
+class LLOHandler:
+    """
+    Handler for Large Language Object (LLO) data in spans.
+    Processes LLO attributes from various instrumentation libraries and
+    converts them to standardized log events.
+    """
+    # Class variable to store the configured logger
+    _logger = None
+
+    @classmethod
+    def configure_logger(cls, logs_endpoint: str):
+        """
+        Configure the LLO logger with the specified logs endpoint.
+        """
+        try:
+            logs_exporter = OTLPAwsLogExporter(endpoint=logs_endpoint)
+            logger_provider = LoggerProvider()
+            logger_provider.add_log_record_processor(BatchLogRecordProcessor(logs_exporter))
+            cls._logger = get_logger("llo_logger", logger_provider=logger_provider)
+            return True
+        except Exception as e:
+            logging.error(f"Failed to configure LLO logger: {e}")
+            return False
+
+    @classmethod
+    def process_span_attributes(cls, span: ReadableSpan) -> Dict[str, Any]:
+        """
+        Process a span's attributes to handle LLO data.
+        """
+        updated_attributes = {}
+
+        # Copy all original attributes and handle LLO data
+        for key, value in span.attributes.items():
+            if cls.should_offload(key):
+                log_record = cls.create_log_record_for_llo(span, key, value)
+
+                if log_record and cls._logger:
+                    try:
+                        cls._logger.emit(log_record)
+                    except Exception as e:
+                        logging.warning(f"Failed to emit LLO log record: {e}")
+            else:
+                updated_attributes[key] = value
+
+        return updated_attributes
+
+    @staticmethod
+    def should_offload(key: str) -> bool:
+        """
+        Determine if an attribute key represents LLO data that should be offloaded.
+        """
+        openinference_patterns = [
+            "input.value",
+            "output.value",
+        ]
+        openinference_regex = [
+            r"^llm\.input_messages\.\d+\.message\.content$",
+            r"^llm\.output_messages\.\d+\.message\.content$",
+        ]
+
+        # Traceloop patterns
+        traceloop_patterns = [
+            "traceloop.entity.input",
+            "traceloop.entity.output",
+        ]
+        traceloop_regex = [
+            r"^gen_ai\.prompt\.\d+\.content$",
+            r"^gen_ai\.completion\.\d+\.content$",
+        ]
+
+        # Generic OTel patterns (relevant for multiple libraries)
+        gen_ai_patterns = [
+            "message.content",
+            "gen_ai.prompt",
+            "gen_ai.completion",
+            "gen_ai.content.revised_prompt",
+        ]
+
+        # Combine all patterns
+        exact_match_patterns = openinference_patterns + traceloop_patterns + gen_ai_patterns
+        regex_match_patterns = openinference_regex + traceloop_regex
+
+        return (
+            any(pattern == key for pattern in exact_match_patterns) or
+            any(re.match(pattern, key) for pattern in regex_match_patterns)
+        )
+
+    @staticmethod
+    def identify_instrumentation_library(span: ReadableSpan) -> str:
+        """
+        Identify which instrumentation library generated this span.
+        """
+        if hasattr(span, "instrumentation_scope") and span.instrumentation_scope:
+            scope_name = span.instrumentation_scope.name
+            if 'openinference' in scope_name:
+                return 'openinference'
+            if 'traceloop' in scope_name:
+                return 'traceloop'
+
+        return 'unknown'
+
+    @staticmethod
+    def create_log_record_for_llo(span: ReadableSpan, key: str, value: Any) -> Optional[LogRecord]:
+        """
+        Create a log record for an LLO attribute if it should be offloaded.
+        """
+        if not LLOHandler.should_offload(key):
+            return None
+
+        # Identify the instrumentation library
+        library = LLOHandler.identify_instrumentation_library(span)
+
+        # Get event information based on the library
+        event_info = LLOHandler.get_event_info(key, value, span, library)
+
+        if not event_info:
+            return None
+
+        # Extract event details
+        event_name = event_info["event_name"]
+        body = event_info["body"]
+
+        gen_ai_system = LLOHandler.determine_gen_ai_system(span, library)
+
+        # Create and return log record
+        log_attributes = {
+            "event.name": event_name,
+            "gen_ai.system": gen_ai_system
+        }
+
+        return LogRecord(
+            timestamp=span.start_time,
+            observed_timestamp=span.start_time,
+            trace_id=span.context.trace_id,
+            span_id=span.context.span_id,
+            trace_flags=span.context.trace_flags if hasattr(span.context, 'trace_flags') else TraceFlags(0x01),
+            severity_text="INFO",
+            severity_number=SeverityNumber.INFO,
+            body=body,
+            attributes=log_attributes
+        )
+
+    @staticmethod
+    def get_event_info(key:str, value: Any, span: ReadableSpan, library: str) -> Optional[Dict]:
+        """
+        Get event information based on the attribute and key library.
+        """
+        if library == 'openinference':
+            return LLOHandler.get_openinference_event_info(key, value, span)
+        elif library == 'traceloop':
+            return LLOHandler.get_traceloop_event_info(key, value, span)
+        else:
+            # Generic handling for unknown libraries
+            return LLOHandler.get_generic_event_info(key, value, span)
+
+    @staticmethod
+    def get_openinference_event_info(key:str, value: Any, span: ReadableSpan) -> Optional[Dict]:
+        """
+        Extract event info for OpenInference attributes.
+        """
+        # Input message patterns
+        if re.match(r"^llm\.input_messages\.\d+\.message\.content$", key) or key == "input.value":
+            return {
+                "event_name": "gen_ai.user.message",
+                "body": {"content": value}
+            }
+            # Output message patterns
+        elif re.match(r"^llm\.output_messages\.\d+\.message\.content$", key) or key == "output.value":
+            # Try to extract finish reason and index if available
+            finish_reason = "stop"  # Default value
+            index = 0  # Default value
+
+            for attr_key, attr_val in span.attributes.items():
+                if "finish_reason" in attr_key:
+                    finish_reason = attr_val
+                    break
+
+            return {
+                "event_name": "gen_ai.choice",
+                "body": {
+                    "message": {
+                        "content": value,
+                        "role": "assistant"
+                    },
+                    "index": index,
+                    "finish_reason": finish_reason
+                }
+            }
+
+        return None  # Return None for attributes that don't match
+
+    @staticmethod
+    def get_traceloop_event_info(key: str, value: Any, span: ReadableSpan) -> Optional[Dict]:
+        """Extract event info for Traceloop attributes."""
+        # Handle structured input/output JSON data
+        if key == "traceloop.entity.input" or key == "traceloop.entity.output":
+            try:
+                # Attempt to parse as JSON
+                data = json.loads(value)
+
+                # Input handling
+                if key == "traceloop.entity.input":
+                    # Extract content from inputs if available
+                    content = None
+                    if "inputs" in data and "input" in data["inputs"]:
+                        content = data["inputs"]["input"]
+
+                    if content:
+                        return {
+                            "event_name": "gen_ai.user.message",
+                            "body": {"content": content}
+                        }
+
+                    # Output handling
+                elif key == "traceloop.entity.output":
+                    # Extract content from outputs if available
+                    content = None
+                    if "outputs" in data and "text" in data["outputs"]:
+                        content = data["outputs"]["text"]
+
+                    if content:
+                        return {
+                            "event_name": "gen_ai.choice",
+                            "body": {
+                                "message": {
+                                    "content": content,
+                                    "role": "assistant"
+                                },
+                                "index": 0,
+                                "finish_reason": "stop"
+                            }
+                        }
+            except (json.JSONDecodeError, TypeError):
+                # If JSON parsing fails, treat as raw text
+                pass
+
+            # Handle direct gen_ai attributes
+        elif re.match(r"^gen_ai\.prompt\.\d+\.content$", key):
+            return {
+                "event_name": "gen_ai.user.message",
+                "body": {"content": value}
+            }
+        elif re.match(r"^gen_ai\.completion\.\d+\.content$", key):
+            return {
+                "event_name": "gen_ai.choice",
+                "body": {
+                    "message": {
+                        "content": value,
+                        "role": "assistant"
+                    },
+                    "index": 0,
+                    "finish_reason": "stop"
+                }
+            }
+
+        return None
+
+    @staticmethod
+    def get_generic_event_info(key: str, value: Any, span: ReadableSpan) -> Optional[Dict]:
+        """Extract event info for generic LLO attributes."""
+        # Basic pattern detection - input/prompt vs output/completion
+        if any(pattern in key for pattern in ["input", "prompt"]):
+            return {
+                "event_name": "gen_ai.user.message",
+                "body": {"content": value}
+            }
+        elif any(pattern in key for pattern in ["output", "completion"]):
+            return {
+                "event_name": "gen_ai.choice",
+                "body": {
+                    "message": {
+                        "content": value,
+                        "role": "assistant"
+                    },
+                    "index": 0,
+                    "finish_reason": "stop"
+                }
+            }
+
+        return None
+
+    @staticmethod
+    def determine_gen_ai_system(span: ReadableSpan, library: str) -> str:
+        """Determine which gen_ai system is being used."""
+        # Check for direct attribute
+        if "gen_ai.system" in span.attributes:
+            system = span.attributes["gen_ai.system"]
+            if isinstance(system, str):
+                return system.lower()
+
+        # Check for model name patterns
+        for key, value in span.attributes.items():
+            if "model" in key and isinstance(value, str):
+                if any(model in value.lower() for model in ["gpt", "openai"]):
+                    return "openai"
+                if any(model in value.lower() for model in ["claude", "anthropic"]):
+                    return "anthropic"
+                if any(model in value.lower() for model in ["llama", "meta"]):
+                    return "meta"
+
+        # Library-specific defaults
+        if library == "traceloop":
+            # Check for provider in traceloop properties
+            for key, value in span.attributes.items():
+                if "provider" in key and isinstance(value, str):
+                    if "openai" in value.lower():
+                        return "openai"
+
+        # Default fallback
+        return "openai"  # Default to openai as it's the most common