Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

AGENT_OBSERVABILITY_ENABLED = "AGENT_OBSERVABILITY_ENABLED"


def is_installed(req: str) -> bool:
"""Is the given required package installed?"""

Expand All @@ -24,5 +25,6 @@ def is_installed(req: str) -> bool:
return False
return True


def is_agent_observability_enabled() -> bool:
return os.environ.get(AGENT_OBSERVABILITY_ENABLED, "false").lower() == "true"
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from typing_extensions import override

from amazon.opentelemetry.distro._aws_attribute_keys import AWS_LOCAL_SERVICE
from amazon.opentelemetry.distro._utils import is_agent_observability_enabled
from amazon.opentelemetry.distro._aws_resource_attribute_configurator import get_service_attribute
from amazon.opentelemetry.distro._utils import is_agent_observability_enabled
from amazon.opentelemetry.distro.always_record_sampler import AlwaysRecordSampler
from amazon.opentelemetry.distro.attribute_propagating_span_processor_builder import (
AttributePropagatingSpanProcessorBuilder,
Expand All @@ -22,13 +22,14 @@
AwsMetricAttributesSpanExporterBuilder,
)
from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder
from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsBatchLogRecordProcessor
from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter
from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter
from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter
from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler
from amazon.opentelemetry.distro.scope_based_exporter import ScopeBasedPeriodicExportingMetricReader
from amazon.opentelemetry.distro.scope_based_filtering_view import ScopeBasedRetainingView
from opentelemetry._logs import set_logger_provider, get_logger_provider
from opentelemetry._logs import get_logger_provider, set_logger_provider
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
Expand Down Expand Up @@ -83,6 +84,7 @@
DEPRECATED_APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG = "OTEL_AWS_APP_SIGNALS_EXPORTER_ENDPOINT"
APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG = "OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT"
METRIC_EXPORT_INTERVAL_CONFIG = "OTEL_METRIC_EXPORT_INTERVAL"
OTEL_LOGS_EXPORTER = "OTEL_LOGS_EXPORTER"
DEFAULT_METRIC_EXPORT_INTERVAL = 60000.0
AWS_LAMBDA_FUNCTION_NAME_CONFIG = "AWS_LAMBDA_FUNCTION_NAME"
AWS_XRAY_DAEMON_ADDRESS_CONFIG = "AWS_XRAY_DAEMON_ADDRESS"
Expand Down Expand Up @@ -181,9 +183,9 @@ def _init_logging(
resource: Resource = None,
):

# Provides a default OTLP log exporter when none is specified.
# Provides a default OTLP log exporter when the environment is not set.
# This is the behavior for the logs exporters for other languages.
if not exporters:
if not exporters and os.environ.get(OTEL_LOGS_EXPORTER) is None:
exporters = {"otlp": OTLPLogExporter}

provider = LoggerProvider(resource=resource)
Expand All @@ -192,7 +194,11 @@ def _init_logging(
for _, exporter_class in exporters.items():
exporter_args: Dict[str, any] = {}
log_exporter = _customize_logs_exporter(exporter_class(**exporter_args), resource)
provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter))

if isinstance(log_exporter, OTLPAwsLogExporter):
provider.add_log_record_processor(AwsBatchLogRecordProcessor(exporter=log_exporter))
else:
provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter))

handler = LoggingHandler(level=NOTSET, logger_provider=provider)

Expand Down Expand Up @@ -364,12 +370,7 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) ->

if isinstance(span_exporter, OTLPSpanExporter):
if is_agent_observability_enabled():
logs_endpoint = os.getenv(OTEL_EXPORTER_OTLP_LOGS_ENDPOINT)
logs_exporter = OTLPAwsLogExporter(endpoint=logs_endpoint)
span_exporter = OTLPAwsSpanExporter(
endpoint=traces_endpoint,
logger_provider=get_logger_provider()
)
span_exporter = OTLPAwsSpanExporter(endpoint=traces_endpoint, logger_provider=get_logger_provider())
else:
span_exporter = OTLPAwsSpanExporter(endpoint=traces_endpoint)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
BASE_LOG_BUFFER_BYTE_SIZE = 2000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does this number come from?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran an e2e test with the Gen AI sample app and subtracted the size of the body content from the size of the entire log itself. I then multiplied by 3 and added a bit of extra buffer to account for larger logs.

MAX_LOG_REQUEST_BYTE_SIZE = (
1048576 # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import logging
from typing import Mapping, Sequence

from amazon.opentelemetry.distro.exporter.otlp.aws.common.constants import (
BASE_LOG_BUFFER_BYTE_SIZE,
MAX_LOG_REQUEST_BYTE_SIZE,
)
from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter
from opentelemetry.sdk._logs import LogData
from opentelemetry.sdk._logs._internal.export import (
_SUPPRESS_INSTRUMENTATION_KEY,
BatchLogExportStrategy,
attach,
detach,
set_value,
)
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry.util.types import AnyValue

_logger = logging.getLogger(__name__)


class AwsBatchLogRecordProcessor(BatchLogRecordProcessor):

def __init__(
self,
exporter: OTLPAwsLogExporter,
schedule_delay_millis: float | None = None,
max_export_batch_size: int | None = None,
export_timeout_millis: float | None = None,
max_queue_size: int | None = None,
):

super().__init__(
exporter=exporter,
schedule_delay_millis=schedule_delay_millis,
max_export_batch_size=max_export_batch_size,
export_timeout_millis=export_timeout_millis,
max_queue_size=max_queue_size,
)

# https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143
def _export(self, batch_strategy: BatchLogExportStrategy) -> None:
"""
Preserves existing batching behavior but will intermediarly export small log batches if
the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB.

- Data size of exported batches will ALWAYS be <= 1 MB except for the case below:
- If the data size of an exported batch is ever > 1 MB then the batch size is guaranteed to be 1
"""

with self._export_lock:
iteration = 0
# We could see concurrent export calls from worker and force_flush. We call _should_export_batch
# once the lock is obtained to see if we still need to make the requested export.
while self._should_export_batch(batch_strategy, iteration):

iteration += 1
token = attach(set_value(_SUPPRESS_INSTRUMENTATION_KEY, True))
try:
batch_length = min(self._max_export_batch_size, len(self._queue))
batch_data_size = 0
batch = []

for _ in range(batch_length):

log_data = self._queue.pop()
log_size = self._get_size_of_log(log_data)

if batch and (batch_data_size + log_size > MAX_LOG_REQUEST_BYTE_SIZE):
# if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1
if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE:
self._exporter.set_gen_ai_flag()

self._exporter.export(batch)
batch_data_size = 0
batch = []

batch_data_size += log_size
batch.append(log_data)

if batch:
# if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1
if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE:
self._exporter.set_gen_ai_flag()

self._exporter.export(batch)
except Exception: # pylint: disable=broad-exception-caught
_logger.exception("Exception while exporting logs.")
detach(token)

@staticmethod
def _get_size_of_log(log_data: LogData) -> int:
"""
Estimates the size of a given LogData based on the size of the body + a buffer
amount representing a rough guess of other data present in the log.
"""
size = BASE_LOG_BUFFER_BYTE_SIZE
body = log_data.log_record.body

if body:
size += AwsBatchLogRecordProcessor._get_size_of_any_value(body)

return size

@staticmethod
def _get_size_of_any_value(val: AnyValue) -> int:
"""
Recursively calculates the size of an AnyValue type in bytes.
"""
size = 0

if isinstance(val, str) or isinstance(val, bytes):
return len(val)

if isinstance(val, bool):
return 4 if val else 5

if isinstance(val, int) or isinstance(val, float):
return len(str(val))

if isinstance(val, Sequence):
for content in val:
size += AwsBatchLogRecordProcessor._get_size_of_any_value(content)

if isinstance(val, Mapping):
for _, content in val.items():
size += AwsBatchLogRecordProcessor._get_size_of_any_value(content)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need these? The Gen AI Events we are emitting from the Span pipeline only contain primitive data types.

This recursive approach traverses the entire object graph depth-first, which could become expensive for:

  • Deeply nested JSON structures
  • Large arrays of complex objects
  • Circular references (would cause infinite recursion if present)
  • Dictionary keys aren't counted in the size calculation (potential underestimation)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This processor will have to account for more than just gen ai events. All logs emitted by the application will be processed through this so we have to account for more than primitive types.


return size
Loading