Skip to content

Commit c0f8a05

Browse files
committed
set up llo handling abstraction
1 parent 28f493d commit c0f8a05

File tree

3 files changed

+164
-144
lines changed

3 files changed

+164
-144
lines changed
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
import re
4+
from typing import Dict, Any, List, Optional, Sequence
5+
6+
from opentelemetry.attributes import BoundedAttributes
7+
from opentelemetry.sdk.trace import ReadableSpan, Event
8+
9+
10+
class LLOHandler:
11+
"""
12+
Utility class for handling Large Language Model Output (LLO) attributes.
13+
This class identifies LLO attributes and determines whether they should be
14+
processed or filtered out from telemetry data.
15+
"""
16+
17+
def __init__(self):
18+
# List of exact attribute keys that should be considered LLO attributes
19+
self._exact_match_patterns = [
20+
"traceloop.entity.input",
21+
"traceloop.entity.output",
22+
"message.content",
23+
"input.value",
24+
"output.value",
25+
"gen_ai.prompt",
26+
"gen_ai.completion",
27+
"gen_ai.content.revised_prompt",
28+
]
29+
30+
# List of regex patterns that should be considered LLO attributes
31+
self._regex_match_patterns = [
32+
r"^gen_ai\.prompt\.\d+\.content$",
33+
r"^gen_ai\.completion\.\d+\.content$",
34+
r"^llm.input_messages\.\d+\.message.content$",
35+
r"^llm.output_messages\.\d+\.message.content$",
36+
]
37+
38+
def is_llo_attribute(self, key: str) -> bool:
39+
"""
40+
Determine if an attribute is LLO based on its key.
41+
Strict matching is enforced to avoid unintended behavior.
42+
43+
Args:
44+
key: The attribute key to check
45+
46+
Returns:
47+
True if the key represents an LLO attribute, False otherwise
48+
"""
49+
return (
50+
any(pattern == key for pattern in self._exact_match_patterns) or
51+
any(re.match(pattern, key) for pattern in self._regex_match_patterns)
52+
)
53+
54+
def filter_attributes(self, attributes: Dict[str, Any]) -> Dict[str, Any]:
55+
"""
56+
Filter out LLO attributes from a dictionary of attributes.
57+
58+
Args:
59+
attributes: Dictionary of attribute key-value pairs
60+
61+
Returns:
62+
A new dictionary with LLO attributes removed
63+
"""
64+
filtered_attributes = {}
65+
for key, value in attributes.items():
66+
if not self.is_llo_attribute(key):
67+
filtered_attributes[key] = value
68+
return filtered_attributes
69+
70+
def update_span_attributes(self, span: ReadableSpan) -> None:
71+
"""
72+
Update span attributes by filtering out LLO attributes.
73+
74+
Args:
75+
span: The span to update
76+
"""
77+
# Filter out LLO attributes
78+
updated_attributes = self.filter_attributes(span.attributes)
79+
80+
# Update span attributes
81+
if isinstance(span.attributes, BoundedAttributes):
82+
span._attributes = BoundedAttributes(
83+
maxlen=span.attributes.maxlen,
84+
attributes=updated_attributes,
85+
immutable=span.attributes._immutable,
86+
max_value_len=span.attributes.max_value_len
87+
)
88+
else:
89+
span._attributes = updated_attributes
90+
91+
def process_span_events(self, span: ReadableSpan) -> None:
92+
"""
93+
Process events within a span by filtering out LLO attributes from event attributes.
94+
95+
Args:
96+
span: The span containing events to process
97+
"""
98+
if not span.events:
99+
return
100+
101+
updated_events = []
102+
103+
for event in span.events:
104+
# Check if this event has any attributes to process
105+
if not event.attributes:
106+
updated_events.append(event) # Keep the original event
107+
continue
108+
109+
# Filter out LLO attributes from event
110+
updated_event_attributes = self.filter_attributes(event.attributes)
111+
112+
# Check if attributes were changed
113+
need_to_update = len(updated_event_attributes) != len(event.attributes)
114+
115+
if need_to_update:
116+
# Create new Event with the updated attributes
117+
limit = None
118+
if isinstance(event.attributes, BoundedAttributes):
119+
limit = event.attributes.maxlen
120+
121+
updated_event = Event(
122+
name=event.name,
123+
attributes=updated_event_attributes,
124+
timestamp=event.timestamp,
125+
limit=limit
126+
)
127+
128+
updated_events.append(updated_event)
129+
else:
130+
# Keep the original event
131+
updated_events.append(event)
132+
133+
# Update the span's events with processed events
134+
span._events = updated_events
135+
136+
def process_spans(self, spans: Sequence[ReadableSpan]) -> List[ReadableSpan]:
137+
"""
138+
Process a list of spans by filtering out LLO attributes from both
139+
span attributes and event attributes.
140+
141+
Args:
142+
spans: List of spans to process
143+
144+
Returns:
145+
List of processed spans with LLO attributes removed
146+
"""
147+
modified_spans = []
148+
149+
for span in spans:
150+
# Update span attributes
151+
self.update_span_attributes(span)
152+
153+
# Process span events
154+
self.process_span_events(span)
155+
156+
# Add the modified span to the result list
157+
modified_spans.append(span)
158+
159+
return modified_spans

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/llo_sender_client.py

Lines changed: 0 additions & 26 deletions
This file was deleted.

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/otlp_aws_span_exporter.py

Lines changed: 5 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,13 @@
33
import logging
44
from typing import Dict, Optional, Sequence
55

6-
import re
76
import requests
87

98
from amazon.opentelemetry.distro._utils import is_installed
10-
from amazon.opentelemetry.distro.llo_sender_client import LLOSenderClient
11-
from opentelemetry.attributes import BoundedAttributes
9+
from amazon.opentelemetry.distro.llo_handler import LLOHandler
1210
from opentelemetry.exporter.otlp.proto.http import Compression
1311
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
14-
from opentelemetry.sdk.trace import ReadableSpan, Event
12+
from opentelemetry.sdk.trace import ReadableSpan
1513
from opentelemetry.sdk.trace.export import SpanExportResult
1614

1715
AWS_SERVICE = "xray"
@@ -41,7 +39,7 @@ def __init__(
4139

4240
self._aws_region = None
4341
self._has_required_dependencies = False
44-
self._llo_sender_client = LLOSenderClient()
42+
self._llo_handler = LLOHandler()
4543
# Requires botocore to be installed to sign the headers. However,
4644
# some users might not need to use this exporter. In order not conflict
4745
# with existing behavior, we check for botocore before initializing this exporter.
@@ -78,123 +76,12 @@ def __init__(
7876
)
7977

8078
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
81-
modified_spans = []
82-
83-
for span in spans:
84-
# Process span attributes
85-
updated_attributes = {}
86-
87-
# Copy all original attributes and handle LLO data
88-
for key, value in span.attributes.items():
89-
if self._should_offload(key):
90-
metadata = {
91-
"trace_id": format(span.context.trace_id, 'x'),
92-
"span_id": format(span.context.span_id, 'x'),
93-
"attribute_name": key,
94-
"span_name": span.name
95-
}
96-
97-
# Get S3 pointer from LLOSenderClient
98-
s3_pointer = self._llo_sender_client.upload(value, metadata)
99-
100-
# Store the S3 pointer instead of original value to trim span
101-
updated_attributes[key] = s3_pointer
102-
else:
103-
# Keep original value if it is not LLO
104-
updated_attributes[key] = value
105-
106-
# Update span attributes
107-
if isinstance(span.attributes, BoundedAttributes):
108-
span._attributes = BoundedAttributes(
109-
maxlen=span.attributes.maxlen,
110-
attributes=updated_attributes,
111-
immutable=span.attributes._immutable,
112-
max_value_len=span.attributes.max_value_len
113-
)
114-
else:
115-
span._attributes = updated_attributes
116-
117-
# Process span events
118-
if span.events:
119-
updated_events = []
120-
121-
for event in span.events:
122-
# Check if this event has any attributes to process
123-
if not event.attributes:
124-
updated_events.append(event) # Keep the original event
125-
continue
126-
127-
# Process event attributes for LLO content
128-
updated_event_attributes = {}
129-
need_to_update = False
130-
131-
for key, value in event.attributes.items():
132-
if self._should_offload(key):
133-
metadata = {
134-
"trace_id": format(span.context.trace_id, 'x'),
135-
"span_id": format(span.context.span_id, 'x'),
136-
"attribute_name": key,
137-
"event_name": event.name,
138-
"event_time": str(event.timestamp)
139-
}
140-
141-
s3_pointer = self._llo_sender_client.upload(value, metadata)
142-
updated_event_attributes[key] = s3_pointer
143-
need_to_update = True
144-
else:
145-
updated_event_attributes[key] = value
146-
147-
if need_to_update:
148-
# Create new Event with the updated attributes
149-
limit = None
150-
if isinstance(event.attributes, BoundedAttributes):
151-
limit = event.attributes.maxlen
152-
153-
updated_event = Event(
154-
name=event.name,
155-
attributes=updated_event_attributes,
156-
timestamp=event.timestamp,
157-
limit=limit
158-
)
159-
160-
updated_events.append(updated_event)
161-
else:
162-
# Keep the original event
163-
updated_events.append(event)
164-
165-
# Update the span's events with processed events
166-
span._events = updated_events
167-
168-
modified_spans.append(span)
79+
# Process spans to handle LLO attributes
80+
modified_spans = self._llo_handler.process_spans(spans)
16981

17082
# Export the modified spans
17183
return super().export(modified_spans)
17284

173-
def _should_offload(self, key):
174-
"""Determine if LLO based on the attribute key. Strict matching is enforced as to not introduce unintended behavior."""
175-
exact_match_patterns = [
176-
"traceloop.entity.input",
177-
"traceloop.entity.output",
178-
"message.content",
179-
"input.value",
180-
"output.value",
181-
"gen_ai.prompt",
182-
"gen_ai.completion",
183-
"gen_ai.content.revised_prompt",
184-
]
185-
186-
regex_match_patterns = [
187-
r"^gen_ai\.prompt\.\d+\.content$",
188-
r"^gen_ai\.completion\.\d+\.content$",
189-
r"^llm.input_messages\.\d+\.message.content$",
190-
r"^llm.output_messages\.\d+\.message.content$",
191-
]
192-
193-
return (
194-
any(pattern == key for pattern in exact_match_patterns) or
195-
any(re.match(pattern, key) for pattern in regex_match_patterns)
196-
)
197-
19885
# Overrides upstream's private implementation of _export. All behaviors are
19986
# the same except if the endpoint is an XRay OTLP endpoint, we will sign the request
20087
# with SigV4 in headers before sending it to the endpoint. Otherwise, we will skip signing.

0 commit comments

Comments
 (0)