From c34f3b92d76892863054bebae79ba584baeef898 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 18 Jun 2025 23:17:16 +0000 Subject: [PATCH 01/52] add logs pipeline --- .../distro/aws_opentelemetry_configurator.py | 15 +- .../logs/aws_batch_log_record_processor.py | 160 ++++++++++++ .../otlp/aws/logs/otlp_aws_logs_exporter.py | 161 +++++++++++- .../otlp/aws/common/test_aws_auth_session.py | 63 +++++ .../aws_batch_log_record_processor_test.py | 236 ++++++++++++++++++ .../aws/logs/otlp_aws_logs_exporter_test.py | 180 +++++++++++++ 6 files changed, 810 insertions(+), 5 deletions(-) create mode 100644 aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index a08374bbe..b21bc6151 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -4,7 +4,7 @@ import os import re from logging import NOTSET, Logger, getLogger -from typing import ClassVar, Dict, List, Type, Union +from typing import ClassVar, Dict, List, Optional, Type, Union from importlib_metadata import version from typing_extensions import override @@ -22,6 +22,7 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsBatchLogRecordProcessor from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter @@ -181,7 +182,9 @@ def _init_logging( # Provides a default OTLP log exporter when none is specified. # This is the behavior for the logs exporters for other languages. - if not exporters: + logs_exporter = os.environ.get("OTEL_LOGS_EXPORTER") + + if not exporters and logs_exporter and (logs_exporter.lower() != "none"): exporters = {"otlp": OTLPLogExporter} provider = LoggerProvider(resource=resource) @@ -190,7 +193,11 @@ def _init_logging( for _, exporter_class in exporters.items(): exporter_args: Dict[str, any] = {} log_exporter = _customize_logs_exporter(exporter_class(**exporter_args), resource) - provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter)) + + if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): + provider.add_log_record_processor(AwsBatchLogRecordProcessor(exporter=log_exporter)) + else: + provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter)) handler = LoggingHandler(level=NOTSET, logger_provider=provider) @@ -532,7 +539,7 @@ def _is_lambda_environment(): return AWS_LAMBDA_FUNCTION_NAME_CONFIG in os.environ -def _is_aws_otlp_endpoint(otlp_endpoint: str = None, service: str = "xray") -> bool: +def _is_aws_otlp_endpoint(otlp_endpoint: Optional[str] = None, service: str = "xray") -> bool: """Is the given endpoint an AWS OTLP endpoint?""" pattern = AWS_TRACES_OTLP_ENDPOINT_PATTERN if service == "xray" else AWS_LOGS_OTLP_ENDPOINT_PATTERN diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py new file mode 100644 index 000000000..8feada9a0 --- /dev/null +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -0,0 +1,160 @@ +import logging +from typing import Mapping, Optional, Sequence, cast + +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter +from opentelemetry.context import ( + _SUPPRESS_INSTRUMENTATION_KEY, + attach, + detach, + set_value, +) +from opentelemetry.sdk._logs import LogData +from opentelemetry.sdk._logs._internal.export import BatchLogExportStrategy +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.util.types import AnyValue + +_logger = logging.getLogger(__name__) + + +class AwsBatchLogRecordProcessor(BatchLogRecordProcessor): + _BASE_LOG_BUFFER_BYTE_SIZE = ( + 2000 # Buffer size in bytes to account for log metadata not included in the body size calculation + ) + _MAX_LOG_REQUEST_BYTE_SIZE = ( + 1048576 # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html + ) + + def __init__( + self, + exporter: OTLPAwsLogExporter, + schedule_delay_millis: Optional[float] = None, + max_export_batch_size: Optional[int] = None, + export_timeout_millis: Optional[float] = None, + max_queue_size: Optional[int] = None, + ): + + super().__init__( + exporter=exporter, + schedule_delay_millis=schedule_delay_millis, + max_export_batch_size=max_export_batch_size, + export_timeout_millis=export_timeout_millis, + max_queue_size=max_queue_size, + ) + + self._exporter = exporter + + # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 + def _export(self, batch_strategy: BatchLogExportStrategy) -> None: + """ + Preserves existing batching behavior but will intermediarly export small log batches if + the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB. + + - Data size of exported batches will ALWAYS be <= 1 MB except for the case below: + - If the data size of an exported batch is ever > 1 MB then the batch size is guaranteed to be 1 + """ + with self._export_lock: + iteration = 0 + # We could see concurrent export calls from worker and force_flush. We call _should_export_batch + # once the lock is obtained to see if we still need to make the requested export. + while self._should_export_batch(batch_strategy, iteration): + iteration += 1 + token = attach(set_value(_SUPPRESS_INSTRUMENTATION_KEY, True)) + try: + batch_length = min(self._max_export_batch_size, len(self._queue)) + batch_data_size = 0 + batch = [] + + for _ in range(batch_length): + log_data: LogData = self._queue.pop() + log_size = self._BASE_LOG_BUFFER_BYTE_SIZE + self._get_any_value_size(log_data.log_record.body) + + if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): + # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 + if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: + if self._is_gen_ai_log(batch[0]): + self._exporter.set_gen_ai_log_flag() + + self._exporter.export(batch) + batch_data_size = 0 + batch = [] + + batch_data_size += log_size + batch.append(log_data) + + if batch: + # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 + if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: + if self._is_gen_ai_log(batch[0]): + self._exporter.set_gen_ai_log_flag() + + self._exporter.export(batch) + except Exception as e: # pylint: disable=broad-exception-caught + _logger.exception("Exception while exporting logs: " + str(e)) + detach(token) + + def _get_any_value_size(self, val: AnyValue, depth: int = 3) -> int: + """ + Only used to indicate whether we should export a batch log size of 1 or not. + Calculates the size in bytes of an AnyValue object. + Will processs complex AnyValue structures up to the specified depth limit. + If the depth limit of the AnyValue structure is exceeded, returns 0. + + Args: + val: The AnyValue object to calculate size for + depth: Maximum depth to traverse in nested structures (default: 3) + + Returns: + int: Total size of the AnyValue object in bytes + """ + # Use a stack to prevent excessive recursive calls. + stack = [(val, 0)] + size: int = 0 + + while stack: + # small optimization. We can stop calculating the size once it reaches the 1 MB limit. + if size >= self._MAX_LOG_REQUEST_BYTE_SIZE: + return size + + next_val, current_depth = stack.pop() + + if isinstance(next_val, (str, bytes)): + size += len(next_val) + continue + + if isinstance(next_val, bool): + size += 4 if next_val else 5 + continue + + if isinstance(next_val, (float, int)): + size += len(str(next_val)) + continue + + if current_depth <= depth: + if isinstance(next_val, Sequence): + for content in next_val: + stack.append((cast(AnyValue, content), current_depth + 1)) + + if isinstance(next_val, Mapping): + for key, content in next_val.items(): + size += len(key) + stack.append((content, current_depth + 1)) + else: + _logger.debug("Max log depth exceeded. Log data size will not be accurately calculated.") + return 0 + + return size + + @staticmethod + def _is_gen_ai_log(log_data: LogData) -> bool: + """ + Is the log a Gen AI log event? + """ + gen_ai_instrumentations = { + "openinference.instrumentation.langchain", + "openinference.instrumentation.crewai", + "opentelemetry.instrumentation.langchain", + "crewai.telemetry", + "openlit.otel.tracing", + } + + return log_data.instrumentation_scope.name in gen_ai_instrumentations diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 048632c06..64203b434 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -1,14 +1,41 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -from typing import Dict, Optional +import gzip +import logging +from io import BytesIO +from time import sleep +from typing import Dict, Optional, Sequence + +import requests from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession +from opentelemetry.exporter.otlp.proto.common._internal import ( + _create_exp_backoff_generator, +) +from opentelemetry.exporter.otlp.proto.common._log_encoder import encode_logs from opentelemetry.exporter.otlp.proto.http import Compression from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter +from opentelemetry.sdk._logs import ( + LogData, +) +from opentelemetry.sdk._logs.export import ( + LogExportResult, +) + +_logger = logging.getLogger(__name__) class OTLPAwsLogExporter(OTLPLogExporter): + _LARGE_LOG_HEADER = "x-aws-truncatable-fields" + _LARGE_GEN_AI_LOG_PATH_HEADER = ( + "\\$['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['body']" + "['kvlistValue']['values'][*]['value']['kvlistValue']['values'][*]" + "['value']['arrayValue']['values'][*]['kvlistValue']['values'][*]" + "['value']['stringValue']" + ) + _RETRY_AFTER_HEADER = "Retry-After" # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + def __init__( self, endpoint: Optional[str] = None, @@ -18,6 +45,7 @@ def __init__( headers: Optional[Dict[str, str]] = None, timeout: Optional[int] = None, ): + self._gen_ai_log_flag = False self._aws_region = None if endpoint: @@ -34,3 +62,134 @@ def __init__( compression=Compression.Gzip, session=AwsAuthSession(aws_region=self._aws_region, service="logs"), ) + + # https://github.com/open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py#L167 + def export(self, batch: Sequence[LogData]) -> LogExportResult: + """ + Exports the given batch of OTLP log data. + Behaviors of how this export will work - + + 1. Always compresses the serialized data into gzip before sending. + + 2. If self._gen_ai_log_flag is enabled, the log data is > 1 MB a + and the assumption is that the log is a normalized gen.ai LogEvent. + - inject the {LARGE_LOG_HEADER} into the header. + + 3. Retry behavior is now the following: + - if the response contains a status code that is retryable and the response contains Retry-After in its + headers, the serialized data will be exported after that set delay + + - if the response does not contain that Retry-After header, default back to the current iteration of the + exponential backoff delay + """ + + if self._shutdown: + _logger.warning("Exporter already shutdown, ignoring batch") + return LogExportResult.FAILURE + + serialized_data = encode_logs(batch).SerializeToString() + + gzip_data = BytesIO() + with gzip.GzipFile(fileobj=gzip_data, mode="w") as gzip_stream: + gzip_stream.write(serialized_data) + + data = gzip_data.getvalue() + + backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) + + while True: + resp = self._send(data) + + if resp.ok: + return LogExportResult.SUCCESS + + if not self._retryable(resp): + _logger.error( + "Failed to export logs batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) + self._gen_ai_log_flag = False + return LogExportResult.FAILURE + + # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + maybe_retry_after = resp.headers.get(self._RETRY_AFTER_HEADER, None) + + # Set the next retry delay to the value of the Retry-After response in the headers. + # If Retry-After is not present in the headers, default to the next iteration of the + # exponential backoff strategy. + + delay = self._parse_retryable_header(maybe_retry_after) + + if delay == -1: + delay = next(backoff, self._MAX_RETRY_TIMEOUT) + + if delay == self._MAX_RETRY_TIMEOUT: + _logger.error( + "Transient error %s encountered while exporting logs batch. " + "No Retry-After header found and all backoff retries exhausted. " + "Logs will not be exported.", + resp.reason, + ) + self._gen_ai_log_flag = False + return LogExportResult.FAILURE + + _logger.warning( + "Transient error %s encountered while exporting logs batch, retrying in %ss.", + resp.reason, + delay, + ) + + sleep(delay) + + def set_gen_ai_log_flag(self): + """ + Sets a flag that indicates the current log batch contains + a generative AI log record that exceeds the CloudWatch Logs size limit (1MB). + """ + self._gen_ai_log_flag = True + + def _send(self, serialized_data: bytes): + try: + response = self._session.post( + url=self._endpoint, + headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, + data=serialized_data, + verify=self._certificate_file, + timeout=self._timeout, + cert=self._client_cert, + ) + return response + except ConnectionError: + response = self._session.post( + url=self._endpoint, + headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, + data=serialized_data, + verify=self._certificate_file, + timeout=self._timeout, + cert=self._client_cert, + ) + return response + + @staticmethod + def _retryable(resp: requests.Response) -> bool: + """ + Is it a retryable response? + """ + + return resp.status_code in (429, 503) or OTLPLogExporter._retryable(resp) + + @staticmethod + def _parse_retryable_header(retry_header: Optional[str]) -> float: + """ + Converts the given retryable header into a delay in seconds, returns -1 if there's no header + or error with the parsing + """ + if not retry_header: + return -1 + + try: + val = float(retry_header) + return val if val >= 0 else -1 + except ValueError: + return -1 diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py new file mode 100644 index 000000000..e0c62b89d --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -0,0 +1,63 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from unittest import TestCase +from unittest.mock import patch + +import requests +from botocore.credentials import Credentials + +from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession + +AWS_OTLP_TRACES_ENDPOINT = "https://xray.us-east-1.amazonaws.com/v1/traces" +AWS_OTLP_LOGS_ENDPOINT = "https://logs.us-east-1.amazonaws.com/v1/logs" + +AUTHORIZATION_HEADER = "Authorization" +X_AMZ_DATE_HEADER = "X-Amz-Date" +X_AMZ_SECURITY_TOKEN_HEADER = "X-Amz-Security-Token" + +mock_credentials = Credentials(access_key="test_access_key", secret_key="test_secret_key", token="test_session_token") + + +class TestAwsAuthSession(TestCase): + @patch("pkg_resources.get_distribution", side_effect=ImportError("test error")) + @patch.dict("sys.modules", {"botocore": None}, clear=False) + @patch("requests.Session.request", return_value=requests.Response()) + def test_aws_auth_session_no_botocore(self, _, __): + """Tests that aws_auth_session will not inject SigV4 Headers if botocore is not installed.""" + + session = AwsAuthSession("us-east-1", "xray") + actual_headers = {"test": "test"} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) + self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) + self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=None) + def test_aws_auth_session_no_credentials(self, _, __): + """Tests that aws_auth_session will not inject SigV4 Headers if retrieving credentials returns None.""" + + session = AwsAuthSession("us-east-1", "xray") + actual_headers = {"test": "test"} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) + self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) + self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + def test_aws_auth_session(self, _, __): + """Tests that aws_auth_session will inject SigV4 Headers if botocore is installed.""" + + session = AwsAuthSession("us-east-1", "xray") + actual_headers = {"test": "test"} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertIn(AUTHORIZATION_HEADER, actual_headers) + self.assertIn(X_AMZ_DATE_HEADER, actual_headers) + self.assertIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py new file mode 100644 index 000000000..1abf680f1 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py @@ -0,0 +1,236 @@ +import time +import unittest +from typing import List +from unittest.mock import MagicMock, patch + +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( + AwsBatchLogRecordProcessor, + BatchLogExportStrategy, +) +from opentelemetry._logs.severity import SeverityNumber +from opentelemetry.sdk._logs import LogData, LogRecord +from opentelemetry.sdk._logs.export import LogExportResult +from opentelemetry.sdk.util.instrumentation import InstrumentationScope +from opentelemetry.trace import TraceFlags +from opentelemetry.util.types import AnyValue + + +class TestAwsBatchLogRecordProcessor(unittest.TestCase): + + def setUp(self): + self.mock_exporter = MagicMock() + self.mock_exporter.export.return_value = LogExportResult.SUCCESS + + self.processor = AwsBatchLogRecordProcessor(exporter=self.mock_exporter) + + def test_process_log_data_nested_structure(self): + """Tests that the processor correctly handles nested structures (dict/list)""" + message_size = 400 + depth = 2 + + nested_dict_log_body = self.generate_nested_log_body( + depth=depth, expected_body="X" * message_size, create_map=True + ) + nested_array_log_body = self.generate_nested_log_body( + depth=depth, expected_body="X" * message_size, create_map=False + ) + + dict_size = self.processor._get_any_value_size(val=nested_dict_log_body, depth=depth) + array_size = self.processor._get_any_value_size(val=nested_array_log_body, depth=depth) + + # Asserting almost equal to account for key lengths in the Log object body + self.assertAlmostEqual(dict_size, message_size, delta=20) + self.assertAlmostEqual(array_size, message_size, delta=20) + + def test_process_log_data_nested_structure_exceeds_depth(self): + """Tests that the processor returns 0 for nested structure that exceeds the depth limit""" + message_size = 400 + log_body = "X" * message_size + + nested_dict_log_body = self.generate_nested_log_body(depth=4, expected_body=log_body, create_map=True) + nested_array_log_body = self.generate_nested_log_body(depth=4, expected_body=log_body, create_map=False) + + dict_size = self.processor._get_any_value_size(val=nested_dict_log_body, depth=3) + array_size = self.processor._get_any_value_size(val=nested_array_log_body, depth=3) + + self.assertEqual(dict_size, 0) + self.assertEqual(array_size, 0) + + def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): + """Tests that the processor returns prematurely if the size already exceeds _MAX_LOG_REQUEST_BYTE_SIZE""" + log_body = { + "smallKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE // 2), + "bigKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1), + } + + nested_dict_log_body = self.generate_nested_log_body(depth=0, expected_body=log_body, create_map=True) + nested_array_log_body = self.generate_nested_log_body(depth=0, expected_body=log_body, create_map=False) + + dict_size = self.processor._get_any_value_size(val=nested_dict_log_body) + array_size = self.processor._get_any_value_size(val=nested_array_log_body) + + self.assertAlmostEqual(dict_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE, delta=20) + self.assertAlmostEqual(array_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE, delta=20) + + def test_process_log_data_primitive(self): + + primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None] + expected_sizes = [4, 4, 1, 3, 4, 5, 0] + + for i in range(len(primitives)): + body = primitives[i] + expected_size = expected_sizes[i] + + actual_size = self.processor._get_any_value_size(body) + self.assertEqual(actual_size, expected_size) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + return_value=MagicMock(), + ) + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + def test_export_single_batch_under_size_limit(self, _, __, ___): + """Tests that export is only called once if a single batch is under the size limit""" + log_count = 10 + log_body = "test" + test_logs = self.generate_test_log_data(count=log_count, log_body=log_body) + total_data_size = 0 + + for log in test_logs: + size = self.processor._get_any_value_size(log.log_record.body) + total_data_size += size + self.processor._queue.appendleft(log) + + self.processor._export(batch_strategy=BatchLogExportStrategy.EXPORT_ALL) + args, _ = self.mock_exporter.export.call_args + actual_batch = args[0] + + self.assertLess(total_data_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE) + self.assertEqual(len(self.processor._queue), 0) + self.assertEqual(len(actual_batch), log_count) + self.mock_exporter.export.assert_called_once() + self.mock_exporter.set_gen_ai_log_flag.assert_not_called() + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + return_value=MagicMock(), + ) + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): + """Should make multiple export calls of batch size 1 to export logs of size > 1 MB. + But should only call set_gen_ai_log_flag if it's a Gen AI log event.""" + + large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) + non_gen_ai_test_logs = self.generate_test_log_data(count=3, log_body=large_log_body) + gen_ai_test_logs = [] + + gen_ai_scopes = [ + "openinference.instrumentation.langchain", + "openinference.instrumentation.crewai", + "opentelemetry.instrumentation.langchain", + "crewai.telemetry", + "openlit.otel.tracing", + ] + + for gen_ai_scope in gen_ai_scopes: + gen_ai_test_logs.extend( + self.generate_test_log_data( + count=1, log_body=large_log_body, instrumentation_scope=InstrumentationScope(gen_ai_scope, "1.0.0") + ) + ) + + test_logs = gen_ai_test_logs + non_gen_ai_test_logs + + for log in test_logs: + self.processor._queue.appendleft(log) + + self.processor._export(batch_strategy=BatchLogExportStrategy.EXPORT_ALL) + + self.assertEqual(len(self.processor._queue), 0) + self.assertEqual(self.mock_exporter.export.call_count, 3 + len(gen_ai_test_logs)) + self.assertEqual(self.mock_exporter.set_gen_ai_log_flag.call_count, len(gen_ai_test_logs)) + + batches = self.mock_exporter.export.call_args_list + + for batch in batches: + self.assertEqual(len(batch[0]), 1) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + return_value=MagicMock(), + ) + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): + """Should make calls to export smaller sub-batch logs""" + large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) + gen_ai_scope = InstrumentationScope("openinference.instrumentation.langchain", "1.0.0") + small_log_body = "X" * ( + int(self.processor._MAX_LOG_REQUEST_BYTE_SIZE / 10) - self.processor._BASE_LOG_BUFFER_BYTE_SIZE + ) + test_logs = self.generate_test_log_data(count=3, log_body=large_log_body, instrumentation_scope=gen_ai_scope) + # 1st, 2nd, 3rd batch = size 1 + # 4th batch = size 10 + # 5th batch = size 2 + small_logs = self.generate_test_log_data(count=12, log_body=small_log_body, instrumentation_scope=gen_ai_scope) + + test_logs.extend(small_logs) + + for log in test_logs: + self.processor._queue.appendleft(log) + + self.processor._export(batch_strategy=BatchLogExportStrategy.EXPORT_ALL) + + self.assertEqual(len(self.processor._queue), 0) + self.assertEqual(self.mock_exporter.export.call_count, 5) + self.assertEqual(self.mock_exporter.set_gen_ai_log_flag.call_count, 3) + + batches = self.mock_exporter.export.call_args_list + + expected_sizes = { + 0: 1, # 1st batch (index 1) should have 1 log + 1: 1, # 2nd batch (index 1) should have 1 log + 2: 1, # 3rd batch (index 2) should have 1 log + 3: 10, # 4th batch (index 3) should have 10 logs + 4: 2, # 5th batch (index 4) should have 2 logs + } + + for i, call in enumerate(batches): + batch = call[0][0] + expected_size = expected_sizes[i] + self.assertEqual(len(batch), expected_size) + + def generate_test_log_data( + self, log_body: AnyValue, count=5, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0") + ) -> List[LogData]: + logs = [] + for i in range(count): + record = LogRecord( + timestamp=int(time.time_ns()), + trace_id=int(f"0x{i + 1:032x}", 16), + span_id=int(f"0x{i + 1:016x}", 16), + trace_flags=TraceFlags(1), + severity_text="INFO", + severity_number=SeverityNumber.INFO, + body=log_body, + attributes={"test.attribute": f"value-{i + 1}"}, + ) + + log_data = LogData(log_record=record, instrumentation_scope=instrumentation_scope) + logs.append(log_data) + + return logs + + @staticmethod + def generate_nested_log_body(depth=0, expected_body: AnyValue = "test", create_map=True): + if depth < 0: + return expected_body + + if create_map: + return { + "key": TestAwsBatchLogRecordProcessor.generate_nested_log_body(depth - 1, expected_body, create_map) + } + + return [TestAwsBatchLogRecordProcessor.generate_nested_log_body(depth - 1, expected_body, create_map)] diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py new file mode 100644 index 000000000..9f6d84b32 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py @@ -0,0 +1,180 @@ +import time +from unittest import TestCase +from unittest.mock import patch + +import requests +from requests.structures import CaseInsensitiveDict + +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter +from opentelemetry._logs.severity import SeverityNumber +from opentelemetry.sdk._logs import LogData, LogRecord +from opentelemetry.sdk._logs.export import ( + LogExportResult, +) +from opentelemetry.sdk.util.instrumentation import InstrumentationScope +from opentelemetry.trace import TraceFlags + + +class TestOTLPAwsLogsExporter(TestCase): + _ENDPOINT = "https://logs.us-west-2.amazonaws.com/v1/logs" + good_response = requests.Response() + good_response.status_code = 200 + + non_retryable_response = requests.Response() + non_retryable_response.status_code = 404 + + retryable_response_no_header = requests.Response() + retryable_response_no_header.status_code = 429 + + retryable_response_header = requests.Response() + retryable_response_header.headers = CaseInsensitiveDict({"Retry-After": "10"}) + retryable_response_header.status_code = 503 + + retryable_response_bad_header = requests.Response() + retryable_response_bad_header.headers = CaseInsensitiveDict({"Retry-After": "-12"}) + retryable_response_bad_header.status_code = 503 + + def setUp(self): + self.logs = self.generate_test_log_data() + self.exporter = OTLPAwsLogExporter(endpoint=self._ENDPOINT) + + @patch("requests.Session.request", return_value=good_response) + def test_export_success(self, mock_request): + """Tests that the exporter always compresses the serialized logs with gzip before exporting.""" + result = self.exporter.export(self.logs) + + mock_request.assert_called_once() + + _, kwargs = mock_request.call_args + data = kwargs.get("data", None) + + self.assertEqual(result, LogExportResult.SUCCESS) + + # Gzip first 10 bytes are reserved for metadata headers: + # https://www.loc.gov/preservation/digital/formats/fdd/fdd000599.shtml?loclr=blogsig + self.assertIsNotNone(data) + self.assertTrue(len(data) >= 10) + self.assertEqual(data[0:2], b"\x1f\x8b") + + @patch("requests.Session.request", return_value=good_response) + def test_export_gen_ai_logs(self, mock_request): + """Tests that when set_gen_ai_log_flag is set, the exporter includes the LLO header in the request.""" + + self.exporter.set_gen_ai_log_flag() + + result = self.exporter.export(self.logs) + + mock_request.assert_called_once() + + _, kwargs = mock_request.call_args + headers = kwargs.get("headers", None) + + self.assertEqual(result, LogExportResult.SUCCESS) + self.assertIsNotNone(headers) + self.assertIn(self.exporter._LARGE_LOG_HEADER, headers) + self.assertEqual(headers[self.exporter._LARGE_LOG_HEADER], self.exporter._LARGE_GEN_AI_LOG_PATH_HEADER) + + @patch("requests.Session.request", return_value=good_response) + def test_should_not_export_if_shutdown(self, mock_request): + """Tests that no export request is made if the exporter is shutdown.""" + self.exporter.shutdown() + result = self.exporter.export(self.logs) + + mock_request.assert_not_called() + self.assertEqual(result, LogExportResult.FAILURE) + + @patch("requests.Session.request", return_value=non_retryable_response) + def test_should_not_export_again_if_not_retryable(self, mock_request): + """Tests that only one export request is made if the response status code is non-retryable.""" + result = self.exporter.export(self.logs) + mock_request.assert_called_once() + + self.assertEqual(result, LogExportResult.FAILURE) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None + ) + @patch("requests.Session.request", return_value=retryable_response_no_header) + def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header(self, mock_request, mock_sleep): + """Tests that multiple export requests are made with exponential delay if the response status code is retryable. + But there is no Retry-After header.""" + result = self.exporter.export(self.logs) + + # 1, 2, 4, 8, 16, 32 delays + self.assertEqual(mock_sleep.call_count, 6) + + delays = mock_sleep.call_args_list + + for i in range(len(delays)): + self.assertEqual(delays[i][0][0], 2**i) + + # Number of calls: 1 + len(1, 2, 4, 8, 16, 32 delays) + self.assertEqual(mock_request.call_count, 7) + self.assertEqual(result, LogExportResult.FAILURE) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None + ) + @patch( + "requests.Session.request", + side_effect=[retryable_response_header, retryable_response_header, retryable_response_header, good_response], + ) + def test_should_export_again_with_server_delay_if_retryable_and_retry_after_header(self, mock_request, mock_sleep): + """Tests that multiple export requests are made with the server's suggested + delay if the response status code is retryable and there is a Retry-After header.""" + result = self.exporter.export(self.logs) + delays = mock_sleep.call_args_list + + for i in range(len(delays)): + self.assertEqual(delays[i][0][0], 10) + + self.assertEqual(mock_sleep.call_count, 3) + self.assertEqual(mock_request.call_count, 4) + self.assertEqual(result, LogExportResult.SUCCESS) + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None + ) + @patch( + "requests.Session.request", + side_effect=[ + retryable_response_bad_header, + retryable_response_bad_header, + retryable_response_bad_header, + good_response, + ], + ) + def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after_header( + self, mock_request, mock_sleep + ): + """Tests that multiple export requests are made with exponential delay if the response status code is retryable. + but the Retry-After header ins invalid or malformed.""" + result = self.exporter.export(self.logs) + delays = mock_sleep.call_args_list + + for i in range(len(delays)): + self.assertEqual(delays[i][0][0], 2**i) + + self.assertEqual(mock_sleep.call_count, 3) + self.assertEqual(mock_request.call_count, 4) + self.assertEqual(result, LogExportResult.SUCCESS) + + def generate_test_log_data(self, count=5): + logs = [] + for i in range(count): + record = LogRecord( + timestamp=int(time.time_ns()), + trace_id=int(f"0x{i + 1:032x}", 16), + span_id=int(f"0x{i + 1:016x}", 16), + trace_flags=TraceFlags(1), + severity_text="INFO", + severity_number=SeverityNumber.INFO, + body=f"Test log {i + 1}", + attributes={"test.attribute": f"value-{i + 1}"}, + ) + + log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) + + logs.append(log_data) + + return logs From 010e7dfe9721f2ede2eadf05aa260d5af6c8afc1 Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:12:40 +0000 Subject: [PATCH 02/52] add logs pipeline --- aws-opentelemetry-distro/pyproject.toml | 108 ++-- .../distro/_aws_metric_attribute_generator.py | 2 +- .../distro/_aws_span_processing_util.py | 10 - .../src/amazon/opentelemetry/distro/_utils.py | 20 +- .../distro/aws_opentelemetry_configurator.py | 6 +- .../logs/aws_batch_log_record_processor.py | 118 ++-- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 53 +- .../metrics/aws_cloudwatch_emf_exporter.py | 523 +++++++++++++++++ .../distro/patches/_bedrock_patches.py | 224 +------ .../distro/patches/_botocore_patches.py | 26 +- .../otlp/aws/common/test_aws_auth_session.py | 23 +- ...=> test_aws_batch_log_record_processor.py} | 176 ++++-- ...test.py => test_otlp_aws_logs_exporter.py} | 26 +- .../test_aws_cloudwatch_emf_exporter.py | 547 ++++++++++++++++++ .../traces}/test_otlp_aws_span_exporter.py | 0 .../distro/test_aws_auth_session.py | 63 -- .../test_aws_metric_attribute_generator.py | 2 +- .../test_aws_opentelementry_configurator.py | 17 + .../distro/test_aws_opentelemetry_distro.py | 7 +- .../distro/test_instrumentation_patch.py | 306 ++-------- .../amazon/opentelemetry/distro/test_utils.py | 96 +++ .../applications/botocore/botocore_server.py | 28 +- .../applications/botocore/requirements.txt | 2 - .../applications/django/requirements.txt | 2 - .../mysql-connector/requirements.txt | 2 - .../applications/mysqlclient/requirements.txt | 2 - .../applications/psycopg2/requirements.txt | 2 - .../applications/pymysql/requirements.txt | 2 - .../applications/requests/requirements.txt | 2 - .../images/mock-collector/pyproject.toml | 6 +- .../images/mock-collector/requirements.txt | 6 +- contract-tests/tests/pyproject.toml | 4 +- .../test/amazon/botocore/botocore_test.py | 41 +- 33 files changed, 1611 insertions(+), 841 deletions(-) create mode 100644 aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py rename aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/{aws_batch_log_record_processor_test.py => test_aws_batch_log_record_processor.py} (54%) rename aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/{otlp_aws_logs_exporter_test.py => test_otlp_aws_logs_exporter.py} (88%) create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py rename aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/{ => exporter/otlp/aws/traces}/test_otlp_aws_span_exporter.py (100%) delete mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_auth_session.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py diff --git a/aws-opentelemetry-distro/pyproject.toml b/aws-opentelemetry-distro/pyproject.toml index 3d8eadbc1..f8984854d 100644 --- a/aws-opentelemetry-distro/pyproject.toml +++ b/aws-opentelemetry-distro/pyproject.toml @@ -24,62 +24,62 @@ classifiers = [ ] dependencies = [ - "opentelemetry-api == 1.27.0", - "opentelemetry-sdk == 1.27.0", - "opentelemetry-exporter-otlp-proto-grpc == 1.27.0", - "opentelemetry-exporter-otlp-proto-http == 1.27.0", - "opentelemetry-propagator-b3 == 1.27.0", - "opentelemetry-propagator-jaeger == 1.27.0", - "opentelemetry-exporter-otlp-proto-common == 1.27.0", + "opentelemetry-api == 1.33.1", + "opentelemetry-sdk == 1.33.1", + "opentelemetry-exporter-otlp-proto-grpc == 1.33.1", + "opentelemetry-exporter-otlp-proto-http == 1.33.1", + "opentelemetry-propagator-b3 == 1.33.1", + "opentelemetry-propagator-jaeger == 1.33.1", + "opentelemetry-exporter-otlp-proto-common == 1.33.1", "opentelemetry-sdk-extension-aws == 2.0.2", "opentelemetry-propagator-aws-xray == 1.0.1", - "opentelemetry-distro == 0.48b0", - "opentelemetry-processor-baggage == 0.48b0", - "opentelemetry-propagator-ot-trace == 0.48b0", - "opentelemetry-instrumentation == 0.48b0", - "opentelemetry-instrumentation-aws-lambda == 0.48b0", - "opentelemetry-instrumentation-aio-pika == 0.48b0", - "opentelemetry-instrumentation-aiohttp-client == 0.48b0", - "opentelemetry-instrumentation-aiopg == 0.48b0", - "opentelemetry-instrumentation-asgi == 0.48b0", - "opentelemetry-instrumentation-asyncpg == 0.48b0", - "opentelemetry-instrumentation-boto == 0.48b0", - "opentelemetry-instrumentation-boto3sqs == 0.48b0", - "opentelemetry-instrumentation-botocore == 0.48b0", - "opentelemetry-instrumentation-celery == 0.48b0", - "opentelemetry-instrumentation-confluent-kafka == 0.48b0", - "opentelemetry-instrumentation-dbapi == 0.48b0", - "opentelemetry-instrumentation-django == 0.48b0", - "opentelemetry-instrumentation-elasticsearch == 0.48b0", - "opentelemetry-instrumentation-falcon == 0.48b0", - "opentelemetry-instrumentation-fastapi == 0.48b0", - "opentelemetry-instrumentation-flask == 0.48b0", - "opentelemetry-instrumentation-grpc == 0.48b0", - "opentelemetry-instrumentation-httpx == 0.48b0", - "opentelemetry-instrumentation-jinja2 == 0.48b0", - "opentelemetry-instrumentation-kafka-python == 0.48b0", - "opentelemetry-instrumentation-logging == 0.48b0", - "opentelemetry-instrumentation-mysql == 0.48b0", - "opentelemetry-instrumentation-mysqlclient == 0.48b0", - "opentelemetry-instrumentation-pika == 0.48b0", - "opentelemetry-instrumentation-psycopg2 == 0.48b0", - "opentelemetry-instrumentation-pymemcache == 0.48b0", - "opentelemetry-instrumentation-pymongo == 0.48b0", - "opentelemetry-instrumentation-pymysql == 0.48b0", - "opentelemetry-instrumentation-pyramid == 0.48b0", - "opentelemetry-instrumentation-redis == 0.48b0", - "opentelemetry-instrumentation-remoulade == 0.48b0", - "opentelemetry-instrumentation-requests == 0.48b0", - "opentelemetry-instrumentation-sqlalchemy == 0.48b0", - "opentelemetry-instrumentation-sqlite3 == 0.48b0", - "opentelemetry-instrumentation-starlette == 0.48b0", - "opentelemetry-instrumentation-system-metrics == 0.48b0", - "opentelemetry-instrumentation-tornado == 0.48b0", - "opentelemetry-instrumentation-tortoiseorm == 0.48b0", - "opentelemetry-instrumentation-urllib == 0.48b0", - "opentelemetry-instrumentation-urllib3 == 0.48b0", - "opentelemetry-instrumentation-wsgi == 0.48b0", - "opentelemetry-instrumentation-cassandra == 0.48b0", + "opentelemetry-distro == 0.54b1", + "opentelemetry-processor-baggage == 0.54b1", + "opentelemetry-propagator-ot-trace == 0.54b1", + "opentelemetry-instrumentation == 0.54b1", + "opentelemetry-instrumentation-aws-lambda == 0.54b1", + "opentelemetry-instrumentation-aio-pika == 0.54b1", + "opentelemetry-instrumentation-aiohttp-client == 0.54b1", + "opentelemetry-instrumentation-aiopg == 0.54b1", + "opentelemetry-instrumentation-asgi == 0.54b1", + "opentelemetry-instrumentation-asyncpg == 0.54b1", + "opentelemetry-instrumentation-boto == 0.54b1", + "opentelemetry-instrumentation-boto3sqs == 0.54b1", + "opentelemetry-instrumentation-botocore == 0.54b1", + "opentelemetry-instrumentation-celery == 0.54b1", + "opentelemetry-instrumentation-confluent-kafka == 0.54b1", + "opentelemetry-instrumentation-dbapi == 0.54b1", + "opentelemetry-instrumentation-django == 0.54b1", + "opentelemetry-instrumentation-elasticsearch == 0.54b1", + "opentelemetry-instrumentation-falcon == 0.54b1", + "opentelemetry-instrumentation-fastapi == 0.54b1", + "opentelemetry-instrumentation-flask == 0.54b1", + "opentelemetry-instrumentation-grpc == 0.54b1", + "opentelemetry-instrumentation-httpx == 0.54b1", + "opentelemetry-instrumentation-jinja2 == 0.54b1", + "opentelemetry-instrumentation-kafka-python == 0.54b1", + "opentelemetry-instrumentation-logging == 0.54b1", + "opentelemetry-instrumentation-mysql == 0.54b1", + "opentelemetry-instrumentation-mysqlclient == 0.54b1", + "opentelemetry-instrumentation-pika == 0.54b1", + "opentelemetry-instrumentation-psycopg2 == 0.54b1", + "opentelemetry-instrumentation-pymemcache == 0.54b1", + "opentelemetry-instrumentation-pymongo == 0.54b1", + "opentelemetry-instrumentation-pymysql == 0.54b1", + "opentelemetry-instrumentation-pyramid == 0.54b1", + "opentelemetry-instrumentation-redis == 0.54b1", + "opentelemetry-instrumentation-remoulade == 0.54b1", + "opentelemetry-instrumentation-requests == 0.54b1", + "opentelemetry-instrumentation-sqlalchemy == 0.54b1", + "opentelemetry-instrumentation-sqlite3 == 0.54b1", + "opentelemetry-instrumentation-starlette == 0.54b1", + "opentelemetry-instrumentation-system-metrics == 0.54b1", + "opentelemetry-instrumentation-tornado == 0.54b1", + "opentelemetry-instrumentation-tortoiseorm == 0.54b1", + "opentelemetry-instrumentation-urllib == 0.54b1", + "opentelemetry-instrumentation-urllib3 == 0.54b1", + "opentelemetry-instrumentation-wsgi == 0.54b1", + "opentelemetry-instrumentation-cassandra == 0.54b1", ] [project.optional-dependencies] diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py index ec5b693ed..173f8492b 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py @@ -35,7 +35,6 @@ ) from amazon.opentelemetry.distro._aws_resource_attribute_configurator import get_service_attribute from amazon.opentelemetry.distro._aws_span_processing_util import ( - GEN_AI_REQUEST_MODEL, LOCAL_ROOT, MAX_KEYWORD_LENGTH, SQL_KEYWORD_PATTERN, @@ -60,6 +59,7 @@ from amazon.opentelemetry.distro.sqs_url_parser import SqsUrlParser from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import BoundedAttributes, ReadableSpan +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_REQUEST_MODEL from opentelemetry.semconv.trace import SpanAttributes # Pertinent OTEL attribute keys diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py index 21e19afa9..d2a039861 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py @@ -26,16 +26,6 @@ # Max keyword length supported by parsing into remote_operation from DB_STATEMENT MAX_KEYWORD_LENGTH = 27 -# TODO: Use Semantic Conventions once upgrade to 0.47b0 -GEN_AI_REQUEST_MODEL: str = "gen_ai.request.model" -GEN_AI_SYSTEM: str = "gen_ai.system" -GEN_AI_REQUEST_MAX_TOKENS: str = "gen_ai.request.max_tokens" -GEN_AI_REQUEST_TEMPERATURE: str = "gen_ai.request.temperature" -GEN_AI_REQUEST_TOP_P: str = "gen_ai.request.top_p" -GEN_AI_RESPONSE_FINISH_REASONS: str = "gen_ai.response.finish_reasons" -GEN_AI_USAGE_INPUT_TOKENS: str = "gen_ai.usage.input_tokens" -GEN_AI_USAGE_OUTPUT_TOKENS: str = "gen_ai.usage.output_tokens" - # Get dialect keywords retrieved from dialect_keywords.json file. # Only meant to be invoked by SQL_KEYWORD_PATTERN and unit tests diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py index 149f9ad29..fa5acf42c 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py @@ -2,10 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 import os -import sys +from importlib.metadata import PackageNotFoundError, version from logging import Logger, getLogger -import pkg_resources +from packaging.requirements import Requirement _logger: Logger = getLogger(__name__) @@ -14,15 +14,21 @@ def is_installed(req: str) -> bool: """Is the given required package installed?""" - - if req in sys.modules and sys.modules[req] is not None: - return True + req = Requirement(req) try: - pkg_resources.get_distribution(req) - except Exception as exc: # pylint: disable=broad-except + dist_version = version(req.name) + except PackageNotFoundError as exc: _logger.debug("Skipping instrumentation patch: package %s, exception: %s", req, exc) return False + + if not list(req.specifier.filter([dist_version])): + _logger.debug( + "instrumentation for package %s is available but version %s is installed. Skipping.", + req, + dist_version, + ) + return False return True diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index b21bc6151..e39c916c5 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -180,11 +180,11 @@ def _init_logging( resource: Resource = None, ): - # Provides a default OTLP log exporter when none is specified. + # Provides a default OTLP log exporter when it's not set. # This is the behavior for the logs exporters for other languages. - logs_exporter = os.environ.get("OTEL_LOGS_EXPORTER") + logs_exporters = os.environ.get("OTEL_LOGS_EXPORTER") - if not exporters and logs_exporter and (logs_exporter.lower() != "none"): + if not exporters and logs_exporters and logs_exporters.lower() != "none": exporters = {"otlp": OTLPLogExporter} provider = LoggerProvider(resource=resource) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 8feada9a0..e57b03f3f 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -1,13 +1,11 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + import logging -from typing import Mapping, Optional, Sequence, cast +from typing import List, Mapping, Optional, Sequence, cast from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter -from opentelemetry.context import ( - _SUPPRESS_INSTRUMENTATION_KEY, - attach, - detach, - set_value, -) +from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY, attach, detach, set_value from opentelemetry.sdk._logs import LogData from opentelemetry.sdk._logs._internal.export import BatchLogExportStrategy from opentelemetry.sdk._logs.export import BatchLogRecordProcessor @@ -18,7 +16,7 @@ class AwsBatchLogRecordProcessor(BatchLogRecordProcessor): _BASE_LOG_BUFFER_BYTE_SIZE = ( - 2000 # Buffer size in bytes to account for log metadata not included in the body size calculation + 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation ) _MAX_LOG_REQUEST_BYTE_SIZE = ( 1048576 # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html @@ -66,7 +64,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: for _ in range(batch_length): log_data: LogData = self._queue.pop() - log_size = self._BASE_LOG_BUFFER_BYTE_SIZE + self._get_any_value_size(log_data.log_record.body) + log_size = self._estimate_log_size(log_data) if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 @@ -88,64 +86,74 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: self._exporter.set_gen_ai_log_flag() self._exporter.export(batch) - except Exception as e: # pylint: disable=broad-exception-caught - _logger.exception("Exception while exporting logs: " + str(e)) + except Exception as exception: # pylint: disable=broad-exception-caught + _logger.exception("Exception while exporting logs: " + str(exception)) detach(token) - def _get_any_value_size(self, val: AnyValue, depth: int = 3) -> int: + def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: """ - Only used to indicate whether we should export a batch log size of 1 or not. - Calculates the size in bytes of an AnyValue object. - Will processs complex AnyValue structures up to the specified depth limit. - If the depth limit of the AnyValue structure is exceeded, returns 0. + Estimates the size in bytes of a log by calculating the size of its body and its attributes + and adding a buffer amount to account for other log metadata information. + Will process complex log structures up to the specified depth limit. + If the depth limit of the log structure is exceeded, returns truncates calculation + to everything up to that point. Args: - val: The AnyValue object to calculate size for + log: The Log object to calculate size for depth: Maximum depth to traverse in nested structures (default: 3) Returns: - int: Total size of the AnyValue object in bytes + int: The estimated size of the log object in bytes """ - # Use a stack to prevent excessive recursive calls. - stack = [(val, 0)] - size: int = 0 - - while stack: - # small optimization. We can stop calculating the size once it reaches the 1 MB limit. - if size >= self._MAX_LOG_REQUEST_BYTE_SIZE: - return size - - next_val, current_depth = stack.pop() - - if isinstance(next_val, (str, bytes)): - size += len(next_val) - continue - - if isinstance(next_val, bool): - size += 4 if next_val else 5 - continue - - if isinstance(next_val, (float, int)): - size += len(str(next_val)) - continue - - if current_depth <= depth: - if isinstance(next_val, Sequence): - for content in next_val: - stack.append((cast(AnyValue, content), current_depth + 1)) - - if isinstance(next_val, Mapping): - for key, content in next_val.items(): - size += len(key) - stack.append((content, current_depth + 1)) - else: - _logger.debug("Max log depth exceeded. Log data size will not be accurately calculated.") - return 0 + + # Use a queue to prevent excessive recursive calls. + # We calculate based on the size of the log record body and attributes for the log. + queue: List[tuple[AnyValue, int]] = [(log.log_record.body, 0), (log.log_record.attributes, -1)] + + size: int = self._BASE_LOG_BUFFER_BYTE_SIZE + + while queue: + new_queue: List[tuple[AnyValue, int]] = [] + + for data in queue: + # small optimization, can stop calculating the size once it reaches the 1 MB limit. + if size >= self._MAX_LOG_REQUEST_BYTE_SIZE: + return size + + next_val, current_depth = data + + if isinstance(next_val, (str, bytes)): + size += len(next_val) + continue + + if isinstance(next_val, bool): + size += 4 if next_val else 5 + continue + + if isinstance(next_val, (float, int)): + size += len(str(next_val)) + continue + + if current_depth <= depth: + if isinstance(next_val, Sequence): + for content in next_val: + new_queue.append((cast(AnyValue, content), current_depth + 1)) + + if isinstance(next_val, Mapping): + for key, content in next_val.items(): + size += len(key) + new_queue.append((content, current_depth + 1)) + else: + _logger.debug( + f"Max log depth of {depth} exceeded. Log data size will not be accurately calculated." + ) + + queue = new_queue return size @staticmethod - def _is_gen_ai_log(log_data: LogData) -> bool: + def _is_gen_ai_log(log: LogData) -> bool: """ Is the log a Gen AI log event? """ @@ -157,4 +165,4 @@ def _is_gen_ai_log(log_data: LogData) -> bool: "openlit.otel.tracing", } - return log_data.instrumentation_scope.name in gen_ai_instrumentations + return log.instrumentation_scope.name in gen_ai_instrumentations diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 64203b434..9bd75d03f 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -10,30 +10,53 @@ import requests from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession -from opentelemetry.exporter.otlp.proto.common._internal import ( - _create_exp_backoff_generator, -) +from opentelemetry.exporter.otlp.proto.common._internal import _create_exp_backoff_generator from opentelemetry.exporter.otlp.proto.common._log_encoder import encode_logs from opentelemetry.exporter.otlp.proto.http import Compression from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter -from opentelemetry.sdk._logs import ( - LogData, -) -from opentelemetry.sdk._logs.export import ( - LogExportResult, -) +from opentelemetry.sdk._logs import LogData +from opentelemetry.sdk._logs.export import LogExportResult _logger = logging.getLogger(__name__) class OTLPAwsLogExporter(OTLPLogExporter): - _LARGE_LOG_HEADER = "x-aws-truncatable-fields" + """ + Below is the protobuf-JSON formatted path to "content" and "role" for the + following GenAI Consolidated Log Event Schema: + + "body": { + "output": { + "messages": [ + { + "content": "hi", + "role": "assistant" + } + ] + }, + "input": { + "messages": [ + { + "content": "hello", + "role": "user" + } + ] + } + } + + """ + _LARGE_GEN_AI_LOG_PATH_HEADER = ( - "\\$['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['body']" - "['kvlistValue']['values'][*]['value']['kvlistValue']['values'][*]" - "['value']['arrayValue']['values'][*]['kvlistValue']['values'][*]" - "['value']['stringValue']" + "\\$['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['body']" # body + "['kvlistValue']['values'][*]['value']" # body['output'], body['input'] + "['kvlistValue']['values'][0]['value']" # body['output']['messages'], body['input']['messages'] + "['arrayValue']['values'][*]" # body['output']['messages'][0..999], body['input']['messages'][0..999] + "['kvlistValue']['values'][*]['value']['stringValue']" # body['output']['messages'][0..999]['content'/'role'], + # body['input']['messages'][0..999]['content'/'role'] ) + + _LARGE_LOG_HEADER = "x-aws-truncatable-fields" + _RETRY_AFTER_HEADER = "Retry-After" # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling def __init__( @@ -160,7 +183,7 @@ def _send(self, serialized_data: bytes): cert=self._client_cert, ) return response - except ConnectionError: + except requests.exceptions.ConnectionError: response = self._session.post( url=self._endpoint, headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py new file mode 100644 index 000000000..e2e364b03 --- /dev/null +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py @@ -0,0 +1,523 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# pylint: disable=no-self-use + +import json +import logging +import time +import uuid +from collections import defaultdict +from typing import Any, Dict, List, Optional, Tuple + +import botocore.session +from botocore.exceptions import ClientError + +from opentelemetry.sdk.metrics import ( + Counter, + Histogram, + ObservableCounter, + ObservableGauge, + ObservableUpDownCounter, + UpDownCounter, +) +from opentelemetry.sdk.metrics._internal.point import Metric +from opentelemetry.sdk.metrics.export import ( + AggregationTemporality, + Gauge, + MetricExporter, + MetricExportResult, + MetricsData, + NumberDataPoint, +) +from opentelemetry.sdk.resources import Resource +from opentelemetry.util.types import Attributes + +logger = logging.getLogger(__name__) + + +class MetricRecord: + """The metric data unified representation of all OTel metrics for OTel to CW EMF conversion.""" + + def __init__(self, metric_name: str, metric_unit: str, metric_description: str): + """ + Initialize metric record. + + Args: + metric_name: Name of the metric + metric_unit: Unit of the metric + metric_description: Description of the metric + """ + # Instrument metadata + self.name = metric_name + self.unit = metric_unit + self.description = metric_description + + # Will be set by conversion methods + self.timestamp: Optional[int] = None + self.attributes: Attributes = {} + + # Different metric type data - only one will be set per record + self.value: Optional[float] = None + self.sum_data: Optional[Any] = None + self.histogram_data: Optional[Any] = None + self.exp_histogram_data: Optional[Any] = None + + +class AwsCloudWatchEmfExporter(MetricExporter): + """ + OpenTelemetry metrics exporter for CloudWatch EMF format. + + This exporter converts OTel metrics into CloudWatch EMF logs which are then + sent to CloudWatch Logs. CloudWatch Logs automatically extracts the metrics + from the EMF logs. + + https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html + + """ + + # CloudWatch EMF supported units + # Ref: https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_MetricDatum.html + EMF_SUPPORTED_UNITS = { + "Seconds", + "Microseconds", + "Milliseconds", + "Bytes", + "Kilobytes", + "Megabytes", + "Gigabytes", + "Terabytes", + "Bits", + "Kilobits", + "Megabits", + "Gigabits", + "Terabits", + "Percent", + "Count", + "Bytes/Second", + "Kilobytes/Second", + "Megabytes/Second", + "Gigabytes/Second", + "Terabytes/Second", + "Bits/Second", + "Kilobits/Second", + "Megabits/Second", + "Gigabits/Second", + "Terabits/Second", + "Count/Second", + "None", + } + + # OTel to CloudWatch unit mapping + # Ref: opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/grouped_metric.go#L188 + UNIT_MAPPING = { + "1": "", + "ns": "", + "ms": "Milliseconds", + "s": "Seconds", + "us": "Microseconds", + "By": "Bytes", + "bit": "Bits", + } + + def __init__( + self, + namespace: str = "default", + log_group_name: str = None, + log_stream_name: Optional[str] = None, + aws_region: Optional[str] = None, + preferred_temporality: Optional[Dict[type, AggregationTemporality]] = None, + **kwargs, + ): + """ + Initialize the CloudWatch EMF exporter. + + Args: + namespace: CloudWatch namespace for metrics + log_group_name: CloudWatch log group name + log_stream_name: CloudWatch log stream name (auto-generated if None) + aws_region: AWS region (auto-detected if None) + preferred_temporality: Optional dictionary mapping instrument types to aggregation temporality + **kwargs: Additional arguments passed to botocore client + """ + # Set up temporality preference default to DELTA if customers not set + if preferred_temporality is None: + preferred_temporality = { + Counter: AggregationTemporality.DELTA, + Histogram: AggregationTemporality.DELTA, + ObservableCounter: AggregationTemporality.DELTA, + ObservableGauge: AggregationTemporality.DELTA, + ObservableUpDownCounter: AggregationTemporality.DELTA, + UpDownCounter: AggregationTemporality.DELTA, + } + + super().__init__(preferred_temporality) + + self.namespace = namespace + self.log_group_name = log_group_name + self.log_stream_name = log_stream_name or self._generate_log_stream_name() + + session = botocore.session.Session() + self.logs_client = session.create_client("logs", region_name=aws_region, **kwargs) + + # Ensure log group exists + self._ensure_log_group_exists() + + # Ensure log stream exists + self._ensure_log_stream_exists() + + # Default to unique log stream name matching OTel Collector + # EMF Exporter behavior with language for source identification + def _generate_log_stream_name(self) -> str: + """Generate a unique log stream name.""" + + unique_id = str(uuid.uuid4())[:8] + return f"otel-python-{unique_id}" + + def _ensure_log_group_exists(self): + """Ensure the log group exists, create if it doesn't.""" + try: + self.logs_client.create_log_group(logGroupName=self.log_group_name) + logger.info("Created log group: %s", self.log_group_name) + except ClientError as error: + if error.response.get("Error", {}).get("Code") == "ResourceAlreadyExistsException": + logger.debug("Log group %s already exists", self.log_group_name) + else: + logger.error("Failed to create log group %s : %s", self.log_group_name, error) + raise + + def _ensure_log_stream_exists(self): + try: + self.logs_client.create_log_stream(logGroupName=self.log_group_name, logStreamName=self.log_stream_name) + logger.info("Created log stream: %s", self.log_stream_name) + except ClientError as error: + if error.response.get("Error", {}).get("Code") == "ResourceAlreadyExistsException": + logger.debug("Log stream %s already exists", self.log_stream_name) + else: + logger.error("Failed to create log stream %s : %s", self.log_group_name, error) + raise + + def _get_metric_name(self, record: MetricRecord) -> Optional[str]: + """Get the metric name from the metric record or data point.""" + + try: + if record.name: + return record.name + except AttributeError: + pass + # Return None if no valid metric name found + return None + + def _get_unit(self, record: MetricRecord) -> Optional[str]: + """Get CloudWatch unit from MetricRecord unit.""" + unit = record.unit + + if not unit: + return None + + # First check if unit is already a supported EMF unit + if unit in self.EMF_SUPPORTED_UNITS: + return unit + + # Map from OTel unit to CloudWatch unit + mapped_unit = self.UNIT_MAPPING.get(unit) + + return mapped_unit + + def _get_dimension_names(self, attributes: Attributes) -> List[str]: + """Extract dimension names from attributes.""" + # Implement dimension selection logic + # For now, use all attributes as dimensions + return list(attributes.keys()) + + def _get_attributes_key(self, attributes: Attributes) -> str: + """ + Create a hashable key from attributes for grouping metrics. + + Args: + attributes: The attributes dictionary + + Returns: + A string representation of sorted attributes key-value pairs + """ + # Sort the attributes to ensure consistent keys + sorted_attrs = sorted(attributes.items()) + # Create a string representation of the attributes + return str(sorted_attrs) + + def _normalize_timestamp(self, timestamp_ns: int) -> int: + """ + Normalize a nanosecond timestamp to milliseconds for CloudWatch. + + Args: + timestamp_ns: Timestamp in nanoseconds + + Returns: + Timestamp in milliseconds + """ + # Convert from nanoseconds to milliseconds + return timestamp_ns // 1_000_000 + + def _create_metric_record(self, metric_name: str, metric_unit: str, metric_description: str) -> MetricRecord: + """ + Creates the intermediate metric data structure that standardizes different otel metric representation + and will be used to generate EMF events. The base record + establishes the instrument schema (name/unit/description) that will be populated + with dimensions, timestamps, and values during metric processing. + + Args: + metric_name: Name of the metric + metric_unit: Unit of the metric + metric_description: Description of the metric + + Returns: + A MetricRecord object + """ + return MetricRecord(metric_name, metric_unit, metric_description) + + def _convert_gauge(self, metric: Metric, data_point: NumberDataPoint) -> MetricRecord: + """Convert a Gauge metric datapoint to a metric record. + + Args: + metric: The metric object + data_point: The datapoint to convert + + Returns: + MetricRecord with populated timestamp, attributes, and value + """ + # Create base record + record = self._create_metric_record(metric.name, metric.unit, metric.description) + + # Set timestamp + try: + timestamp_ms = ( + self._normalize_timestamp(data_point.time_unix_nano) + if data_point.time_unix_nano is not None + else int(time.time() * 1000) + ) + except AttributeError: + # data_point doesn't have time_unix_nano attribute + timestamp_ms = int(time.time() * 1000) + record.timestamp = timestamp_ms + + # Set attributes + try: + record.attributes = data_point.attributes + except AttributeError: + # data_point doesn't have attributes + record.attributes = {} + + # For Gauge, set the value directly + try: + record.value = data_point.value + except AttributeError: + # data_point doesn't have value + record.value = None + + return record + + def _group_by_attributes_and_timestamp(self, record: MetricRecord) -> Tuple[str, int]: + """Group metric record by attributes and timestamp. + + Args: + record: The metric record + + Returns: + A tuple key for grouping + """ + # Create a key for grouping based on attributes + attrs_key = self._get_attributes_key(record.attributes) + return (attrs_key, record.timestamp) + + def _create_emf_log( + self, metric_records: List[MetricRecord], resource: Resource, timestamp: Optional[int] = None + ) -> Dict: + """ + Create EMF log dictionary from metric records. + + Since metric_records is already grouped by attributes, this function + creates a single EMF log for all records. + """ + # Start with base structure + emf_log = {"_aws": {"Timestamp": timestamp or int(time.time() * 1000), "CloudWatchMetrics": []}} + + # Set with latest EMF version schema + # opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/metric_translator.go#L414 + emf_log["Version"] = "1" + + # Add resource attributes to EMF log but not as dimensions + # OTel collector EMF Exporter has a resource_to_telemetry_conversion flag that will convert resource attributes + # as regular metric attributes(potential dimensions). However, for this SDK EMF implementation, + # we align with the OpenTelemetry concept that all metric attributes are treated as dimensions. + # And have resource attributes as just additional metadata in EMF, added otel.resource as prefix to distinguish. + if resource and resource.attributes: + for key, value in resource.attributes.items(): + emf_log[f"otel.resource.{key}"] = str(value) + + # Initialize collections for dimensions and metrics + metric_definitions = [] + # Collect attributes from all records (they should be the same for all records in the group) + # Only collect once from the first record and apply to all records + all_attributes = metric_records[0].attributes if metric_records and metric_records[0].attributes else {} + + # Process each metric record + for record in metric_records: + + metric_name = self._get_metric_name(record) + + # Skip processing if metric name is None or empty + if not metric_name: + continue + + # Skip processing if metric value is None or empty + if record.value is None: + logger.debug("Skipping metric %s as it does not have valid metric value", metric_name) + continue + + # Create metric data dict + metric_data = {"Name": metric_name} + + unit = self._get_unit(record) + if unit: + metric_data["Unit"] = unit + + # Add to metric definitions list + metric_definitions.append(metric_data) + + emf_log[metric_name] = record.value + + # Get dimension names from collected attributes + dimension_names = self._get_dimension_names(all_attributes) + + # Add attribute values to the root of the EMF log + for name, value in all_attributes.items(): + emf_log[name] = str(value) + + # Add the single dimension set to CloudWatch Metrics if we have dimensions and metrics + if dimension_names and metric_definitions: + emf_log["_aws"]["CloudWatchMetrics"].append( + {"Namespace": self.namespace, "Dimensions": [dimension_names], "Metrics": metric_definitions} + ) + + return emf_log + + # pylint: disable=no-member + def _send_log_event(self, log_event: Dict[str, Any]): + """ + Send a log event to CloudWatch Logs. + + Basic implementation for PR 1 - sends individual events directly. + + TODO: Batching event and follow CloudWatch Logs quato constraints - number of events & size limit per payload + """ + try: + # Send the log event + response = self.logs_client.put_log_events( + logGroupName=self.log_group_name, logStreamName=self.log_stream_name, logEvents=[log_event] + ) + + logger.debug("Successfully sent log event") + return response + + except ClientError as error: + logger.debug("Failed to send log event: %s", error) + raise + + # pylint: disable=too-many-nested-blocks + def export( + self, metrics_data: MetricsData, timeout_millis: Optional[int] = None, **kwargs: Any + ) -> MetricExportResult: + """ + Export metrics as EMF logs to CloudWatch. + + Groups metrics by attributes and timestamp before creating EMF logs. + + Args: + metrics_data: MetricsData containing resource metrics and scope metrics + timeout_millis: Optional timeout in milliseconds + **kwargs: Additional keyword arguments + + Returns: + MetricExportResult indicating success or failure + """ + try: + if not metrics_data.resource_metrics: + return MetricExportResult.SUCCESS + + # Process all metrics from all resource metrics and scope metrics + for resource_metrics in metrics_data.resource_metrics: + for scope_metrics in resource_metrics.scope_metrics: + # Dictionary to group metrics by attributes and timestamp + grouped_metrics = defaultdict(list) + + # Process all metrics in this scope + for metric in scope_metrics.metrics: + # Skip if metric.data is None or no data_points exists + try: + if not (metric.data and metric.data.data_points): + continue + except AttributeError: + # Metric doesn't have data or data_points attribute + continue + + # Process metrics based on type + metric_type = type(metric.data) + if metric_type == Gauge: + for dp in metric.data.data_points: + record = self._convert_gauge(metric, dp) + grouped_metrics[self._group_by_attributes_and_timestamp(record)].append(record) + else: + logger.debug("Unsupported Metric Type: %s", metric_type) + + # Now process each group separately to create one EMF log per group + for (_, timestamp_ms), metric_records in grouped_metrics.items(): + if not metric_records: + continue + + # Create and send EMF log for this batch of metrics + self._send_log_event( + { + "message": json.dumps( + self._create_emf_log(metric_records, resource_metrics.resource, timestamp_ms) + ), + "timestamp": timestamp_ms, + } + ) + + return MetricExportResult.SUCCESS + # pylint: disable=broad-exception-caught + # capture all types of exceptions to not interrupt the instrumented services + except Exception as error: + logger.error("Failed to export metrics: %s", error) + return MetricExportResult.FAILURE + + def force_flush(self, timeout_millis: int = 10000) -> bool: + """ + Force flush any pending metrics. + + TODO: will add logic to handle gracefule shutdown + + Args: + timeout_millis: Timeout in milliseconds + + Returns: + True if successful, False otherwise + """ + logger.debug("AwsCloudWatchEmfExporter force flushes the buffered metrics") + return True + + def shutdown(self, timeout_millis: Optional[int] = None, **kwargs: Any) -> bool: + """ + Shutdown the exporter. + Override to handle timeout and other keyword arguments, but do nothing. + + TODO: will add logic to handle gracefule shutdown + + Args: + timeout_millis: Ignored timeout in milliseconds + **kwargs: Ignored additional keyword arguments + """ + # Intentionally do nothing + self.force_flush(timeout_millis) + logger.debug("AwsCloudWatchEmfExporter shutdown called with timeout_millis=%s", timeout_millis) + return True diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py index a25e55330..549154771 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py @@ -2,13 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import abc import inspect -import io -import json import logging -import math -from typing import Any, Dict, Optional - -from botocore.response import StreamingBody +from typing import Dict, Optional from amazon.opentelemetry.distro._aws_attribute_keys import ( AWS_BEDROCK_AGENT_ID, @@ -17,20 +12,11 @@ AWS_BEDROCK_GUARDRAIL_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID, ) -from amazon.opentelemetry.distro._aws_span_processing_util import ( - GEN_AI_REQUEST_MAX_TOKENS, - GEN_AI_REQUEST_MODEL, - GEN_AI_REQUEST_TEMPERATURE, - GEN_AI_REQUEST_TOP_P, - GEN_AI_RESPONSE_FINISH_REASONS, - GEN_AI_SYSTEM, - GEN_AI_USAGE_INPUT_TOKENS, - GEN_AI_USAGE_OUTPUT_TOKENS, -) from opentelemetry.instrumentation.botocore.extensions.types import ( _AttributeMapT, _AwsSdkCallContext, _AwsSdkExtension, + _BotocoreInstrumentorContext, _BotoResultT, ) from opentelemetry.trace.span import Span @@ -192,7 +178,7 @@ def extract_attributes(self, attributes: _AttributeMapT): if request_param_value: attributes[attribute_key] = request_param_value - def on_success(self, span: Span, result: _BotoResultT): + def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): if self._operation_class is None: return @@ -229,7 +215,7 @@ class _BedrockExtension(_AwsSdkExtension): """ # pylint: disable=no-self-use - def on_success(self, span: Span, result: _BotoResultT): + def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): # _GUARDRAIL_ID can only be retrieved from the response, not from the request guardrail_id = result.get(_GUARDRAIL_ID) if guardrail_id: @@ -244,205 +230,3 @@ def on_success(self, span: Span, result: _BotoResultT): AWS_BEDROCK_GUARDRAIL_ARN, guardrail_arn, ) - - -class _BedrockRuntimeExtension(_AwsSdkExtension): - """ - This class is an extension for - Amazon Bedrock Runtime. - """ - - def extract_attributes(self, attributes: _AttributeMapT): - attributes[GEN_AI_SYSTEM] = _AWS_BEDROCK_SYSTEM - - model_id = self._call_context.params.get(_MODEL_ID) - if model_id: - attributes[GEN_AI_REQUEST_MODEL] = model_id - - # Get the request body if it exists - body = self._call_context.params.get("body") - if body: - try: - request_body = json.loads(body) - - if "amazon.titan" in model_id: - self._extract_titan_attributes(attributes, request_body) - if "amazon.nova" in model_id: - self._extract_nova_attributes(attributes, request_body) - elif "anthropic.claude" in model_id: - self._extract_claude_attributes(attributes, request_body) - elif "meta.llama" in model_id: - self._extract_llama_attributes(attributes, request_body) - elif "cohere.command" in model_id: - self._extract_cohere_attributes(attributes, request_body) - elif "ai21.jamba" in model_id: - self._extract_ai21_attributes(attributes, request_body) - elif "mistral" in model_id: - self._extract_mistral_attributes(attributes, request_body) - - except json.JSONDecodeError: - _logger.debug("Error: Unable to parse the body as JSON") - - def _extract_titan_attributes(self, attributes, request_body): - config = request_body.get("textGenerationConfig", {}) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("topP")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount")) - - def _extract_nova_attributes(self, attributes, request_body): - config = request_body.get("inferenceConfig", {}) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("top_p")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens")) - - def _extract_claude_attributes(self, attributes, request_body): - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) - - def _extract_cohere_attributes(self, attributes, request_body): - prompt = request_body.get("message") - if prompt: - attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6) - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p")) - - def _extract_ai21_attributes(self, attributes, request_body): - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) - - def _extract_llama_attributes(self, attributes, request_body): - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) - - def _extract_mistral_attributes(self, attributes, request_body): - prompt = request_body.get("prompt") - if prompt: - attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6) - self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")) - self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")) - - @staticmethod - def _set_if_not_none(attributes, key, value): - if value is not None: - attributes[key] = value - - # pylint: disable=too-many-branches - def on_success(self, span: Span, result: Dict[str, Any]): - model_id = self._call_context.params.get(_MODEL_ID) - - if not model_id: - return - - if "body" in result and isinstance(result["body"], StreamingBody): - original_body = None - try: - original_body = result["body"] - body_content = original_body.read() - - # Use one stream for telemetry - stream = io.BytesIO(body_content) - telemetry_content = stream.read() - response_body = json.loads(telemetry_content.decode("utf-8")) - if "amazon.titan" in model_id: - self._handle_amazon_titan_response(span, response_body) - if "amazon.nova" in model_id: - self._handle_amazon_nova_response(span, response_body) - elif "anthropic.claude" in model_id: - self._handle_anthropic_claude_response(span, response_body) - elif "meta.llama" in model_id: - self._handle_meta_llama_response(span, response_body) - elif "cohere.command" in model_id: - self._handle_cohere_command_response(span, response_body) - elif "ai21.jamba" in model_id: - self._handle_ai21_jamba_response(span, response_body) - elif "mistral" in model_id: - self._handle_mistral_mistral_response(span, response_body) - # Replenish stream for downstream application use - new_stream = io.BytesIO(body_content) - result["body"] = StreamingBody(new_stream, len(body_content)) - - except json.JSONDecodeError: - _logger.debug("Error: Unable to parse the response body as JSON") - except Exception as e: # pylint: disable=broad-exception-caught, invalid-name - _logger.debug("Error processing response: %s", e) - finally: - if original_body is not None: - original_body.close() - - # pylint: disable=no-self-use - def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]): - if "inputTextTokenCount" in response_body: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"]) - if "results" in response_body and response_body["results"]: - result = response_body["results"][0] - if "tokenCount" in result: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"]) - if "completionReason" in result: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result["completionReason"]]) - - # pylint: disable=no-self-use - def _handle_amazon_nova_response(self, span: Span, response_body: Dict[str, Any]): - if "usage" in response_body: - usage = response_body["usage"] - if "inputTokens" in usage: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["inputTokens"]) - if "outputTokens" in usage: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["outputTokens"]) - if "stopReason" in response_body: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]]) - - # pylint: disable=no-self-use - def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]): - if "usage" in response_body: - usage = response_body["usage"] - if "input_tokens" in usage: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"]) - if "output_tokens" in usage: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"]) - if "stop_reason" in response_body: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]) - - # pylint: disable=no-self-use - def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]): - # Output tokens: Approximate from the response text - if "text" in response_body: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / 6)) - if "finish_reason" in response_body: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]]) - - # pylint: disable=no-self-use - def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]): - if "usage" in response_body: - usage = response_body["usage"] - if "prompt_tokens" in usage: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"]) - if "completion_tokens" in usage: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"]) - if "choices" in response_body: - choices = response_body["choices"][0] - if "finish_reason" in choices: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]]) - - # pylint: disable=no-self-use - def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]): - if "prompt_token_count" in response_body: - span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"]) - if "generation_token_count" in response_body: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"]) - if "stop_reason" in response_body: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]) - - # pylint: disable=no-self-use - def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]): - if "outputs" in response_body: - outputs = response_body["outputs"][0] - if "text" in outputs: - span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / 6)) - if "stop_reason" in outputs: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]]) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py index 0f4a77d1e..ffc81b348 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py @@ -19,13 +19,17 @@ _BedrockAgentExtension, _BedrockAgentRuntimeExtension, _BedrockExtension, - _BedrockRuntimeExtension, ) from opentelemetry.instrumentation.botocore.extensions import _KNOWN_EXTENSIONS from opentelemetry.instrumentation.botocore.extensions.lmbd import _LambdaExtension from opentelemetry.instrumentation.botocore.extensions.sns import _SnsExtension from opentelemetry.instrumentation.botocore.extensions.sqs import _SqsExtension -from opentelemetry.instrumentation.botocore.extensions.types import _AttributeMapT, _AwsSdkExtension, _BotoResultT +from opentelemetry.instrumentation.botocore.extensions.types import ( + _AttributeMapT, + _AwsSdkExtension, + _BotocoreInstrumentorContext, + _BotoResultT, +) from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace.span import Span @@ -75,8 +79,8 @@ def patch_extract_attributes(self, attributes: _AttributeMapT): old_on_success = _LambdaExtension.on_success - def patch_on_success(self, span: Span, result: _BotoResultT): - old_on_success(self, span, result) + def patch_on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): + old_on_success(self, span, result, instrumentor_context) lambda_configuration = result.get("Configuration", {}) function_arn = lambda_configuration.get("FunctionArn") if function_arn: @@ -180,8 +184,8 @@ def patch_extract_attributes(self, attributes: _AttributeMapT): old_on_success = _SqsExtension.on_success - def patch_on_success(self, span: Span, result: _BotoResultT): - old_on_success(self, span, result) + def patch_on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): + old_on_success(self, span, result, instrumentor_context) queue_url = result.get("QueueUrl") if queue_url: span.set_attribute(AWS_SQS_QUEUE_URL, queue_url) @@ -191,17 +195,17 @@ def patch_on_success(self, span: Span, result: _BotoResultT): def _apply_botocore_bedrock_patch() -> None: - """Botocore instrumentation patch for Bedrock, Bedrock Agent, Bedrock Runtime and Bedrock Agent Runtime + """Botocore instrumentation patch for Bedrock, Bedrock Agent, and Bedrock Agent Runtime This patch adds an extension to the upstream's list of known extension for Bedrock. Extensions allow for custom logic for adding service-specific information to spans, such as attributes. - Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys, - GEN_AI_REQUEST_MODEL and GEN_AI_SYSTEM attributes referenced in _aws_span_processing_util. + Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys. + Note: Bedrock Runtime uses the upstream extension directly. """ _KNOWN_EXTENSIONS["bedrock"] = _lazy_load(".", "_BedrockExtension") _KNOWN_EXTENSIONS["bedrock-agent"] = _lazy_load(".", "_BedrockAgentExtension") _KNOWN_EXTENSIONS["bedrock-agent-runtime"] = _lazy_load(".", "_BedrockAgentRuntimeExtension") - _KNOWN_EXTENSIONS["bedrock-runtime"] = _lazy_load(".", "_BedrockRuntimeExtension") + # bedrock-runtime is handled by upstream # The OpenTelemetry Authors code @@ -243,7 +247,7 @@ def extract_attributes(self, attributes: _AttributeMapT): attributes[AWS_SECRETSMANAGER_SECRET_ARN] = secret_id # pylint: disable=no-self-use - def on_success(self, span: Span, result: _BotoResultT): + def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): secret_arn = result.get("ARN") if secret_arn: span.set_attribute(AWS_SECRETSMANAGER_SECRET_ARN, secret_arn) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py index e0c62b89d..85a6c8958 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -1,5 +1,6 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +from importlib.metadata import PackageNotFoundError from unittest import TestCase from unittest.mock import patch @@ -19,11 +20,12 @@ class TestAwsAuthSession(TestCase): - @patch("pkg_resources.get_distribution", side_effect=ImportError("test error")) - @patch.dict("sys.modules", {"botocore": None}, clear=False) + @patch("amazon.opentelemetry.distro._utils.version") + @patch.dict("sys.modules", {"botocore": None}) @patch("requests.Session.request", return_value=requests.Response()) - def test_aws_auth_session_no_botocore(self, _, __): + def test_aws_auth_session_no_botocore(self, mock_request, mock_version): """Tests that aws_auth_session will not inject SigV4 Headers if botocore is not installed.""" + mock_version.side_effect = PackageNotFoundError("botocore") session = AwsAuthSession("us-east-1", "xray") actual_headers = {"test": "test"} @@ -61,3 +63,18 @@ def test_aws_auth_session(self, _, __): self.assertIn(AUTHORIZATION_HEADER, actual_headers) self.assertIn(X_AMZ_DATE_HEADER, actual_headers) self.assertIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + @patch("botocore.auth.SigV4Auth.add_auth", side_effect=Exception("Signing failed")) + def test_aws_auth_session_signing_error(self, mock_add_auth, mock_get_credentials, mock_request): + """Tests that aws_auth_session does not any Sigv4 headers if signing errors.""" + + session = AwsAuthSession("us-east-1", "xray") + actual_headers = {"test": "test"} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) + self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) + self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py similarity index 54% rename from aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py rename to aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 1abf680f1..346b44291 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor_test.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -22,55 +22,75 @@ def setUp(self): self.mock_exporter.export.return_value = LogExportResult.SUCCESS self.processor = AwsBatchLogRecordProcessor(exporter=self.mock_exporter) + self.max_log_size = self.processor._MAX_LOG_REQUEST_BYTE_SIZE + self.base_log_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE def test_process_log_data_nested_structure(self): """Tests that the processor correctly handles nested structures (dict/list)""" message_size = 400 - depth = 2 + message = "X" * message_size - nested_dict_log_body = self.generate_nested_log_body( - depth=depth, expected_body="X" * message_size, create_map=True + nest_dict_log = self.generate_test_log_data( + log_body=message, attr_key="t", attr_val=message, log_body_depth=2, attr_depth=2, count=1, create_map=True ) - nested_array_log_body = self.generate_nested_log_body( - depth=depth, expected_body="X" * message_size, create_map=False + nest_array_log = self.generate_test_log_data( + log_body=message, attr_key="t", attr_val=message, log_body_depth=2, attr_depth=2, count=1, create_map=False ) - dict_size = self.processor._get_any_value_size(val=nested_dict_log_body, depth=depth) - array_size = self.processor._get_any_value_size(val=nested_array_log_body, depth=depth) + expected_size = self.base_log_size + message_size * 2 - # Asserting almost equal to account for key lengths in the Log object body - self.assertAlmostEqual(dict_size, message_size, delta=20) - self.assertAlmostEqual(array_size, message_size, delta=20) + dict_size = self.processor._estimate_log_size(log=nest_dict_log[0], depth=2) + array_size = self.processor._estimate_log_size(log=nest_array_log[0], depth=2) + + # Asserting almost equal to account for dictionary keys in the Log object + self.assertAlmostEqual(dict_size, expected_size, delta=10) + self.assertAlmostEqual(array_size, expected_size, delta=10) def test_process_log_data_nested_structure_exceeds_depth(self): - """Tests that the processor returns 0 for nested structure that exceeds the depth limit""" - message_size = 400 - log_body = "X" * message_size + """Tests that the processor cuts off calculation for nested structure that exceeds the depth limit""" + calculated = "X" * 400 + message = {"calculated": calculated, "truncated": {"truncated": {"test": "X" * self.max_log_size}}} - nested_dict_log_body = self.generate_nested_log_body(depth=4, expected_body=log_body, create_map=True) - nested_array_log_body = self.generate_nested_log_body(depth=4, expected_body=log_body, create_map=False) + # *2 since we set this message in both body and attributes + expected_size = self.base_log_size + (len("calculated") + len(calculated) + len("truncated")) * 2 + + nest_dict_log = self.generate_test_log_data( + log_body=message, attr_key="t", attr_val=message, log_body_depth=3, attr_depth=3, count=1, create_map=True + ) + nest_array_log = self.generate_test_log_data( + log_body=message, attr_key="t", attr_val=message, log_body_depth=3, attr_depth=3, count=1, create_map=False + ) - dict_size = self.processor._get_any_value_size(val=nested_dict_log_body, depth=3) - array_size = self.processor._get_any_value_size(val=nested_array_log_body, depth=3) + # Only calculates log size of up to depth of 4 + dict_size = self.processor._estimate_log_size(log=nest_dict_log[0], depth=4) + array_size = self.processor._estimate_log_size(log=nest_array_log[0], depth=4) - self.assertEqual(dict_size, 0) - self.assertEqual(array_size, 0) + # Asserting almost equal to account for dictionary keys in the Log object body + self.assertAlmostEqual(dict_size, expected_size, delta=10) + self.assertAlmostEqual(array_size, expected_size, delta=10) def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): """Tests that the processor returns prematurely if the size already exceeds _MAX_LOG_REQUEST_BYTE_SIZE""" - log_body = { - "smallKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE // 2), - "bigKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1), + # Should stop calculation at bigKey + message = { + "bigKey": "X" * (self.max_log_size), + "smallKey": "X" * (self.max_log_size * 10), } - nested_dict_log_body = self.generate_nested_log_body(depth=0, expected_body=log_body, create_map=True) - nested_array_log_body = self.generate_nested_log_body(depth=0, expected_body=log_body, create_map=False) + expected_size = self.base_log_size + self.max_log_size + len("bigKey") + + nest_dict_log = self.generate_test_log_data( + log_body=message, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=1, create_map=True + ) + nest_array_log = self.generate_test_log_data( + log_body=message, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=1, create_map=False + ) - dict_size = self.processor._get_any_value_size(val=nested_dict_log_body) - array_size = self.processor._get_any_value_size(val=nested_array_log_body) + dict_size = self.processor._estimate_log_size(log=nest_dict_log[0]) + array_size = self.processor._estimate_log_size(log=nest_array_log[0]) - self.assertAlmostEqual(dict_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE, delta=20) - self.assertAlmostEqual(array_size, self.processor._MAX_LOG_REQUEST_BYTE_SIZE, delta=20) + self.assertAlmostEqual(dict_size, expected_size, delta=10) + self.assertAlmostEqual(array_size, expected_size, delta=10) def test_process_log_data_primitive(self): @@ -78,10 +98,18 @@ def test_process_log_data_primitive(self): expected_sizes = [4, 4, 1, 3, 4, 5, 0] for i in range(len(primitives)): - body = primitives[i] - expected_size = expected_sizes[i] + log = self.generate_test_log_data( + log_body=primitives[i], + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=1, + ) + + expected_size = self.base_log_size + expected_sizes[i] + actual_size = self.processor._estimate_log_size(log[0]) - actual_size = self.processor._get_any_value_size(body) self.assertEqual(actual_size, expected_size) @patch( @@ -94,11 +122,13 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): """Tests that export is only called once if a single batch is under the size limit""" log_count = 10 log_body = "test" - test_logs = self.generate_test_log_data(count=log_count, log_body=log_body) + test_logs = self.generate_test_log_data( + log_body=log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=log_count + ) total_data_size = 0 for log in test_logs: - size = self.processor._get_any_value_size(log.log_record.body) + size = self.processor._estimate_log_size(log) total_data_size += size self.processor._queue.appendleft(log) @@ -123,7 +153,9 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): But should only call set_gen_ai_log_flag if it's a Gen AI log event.""" large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) - non_gen_ai_test_logs = self.generate_test_log_data(count=3, log_body=large_log_body) + non_gen_ai_test_logs = self.generate_test_log_data( + log_body=large_log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=3 + ) gen_ai_test_logs = [] gen_ai_scopes = [ @@ -137,7 +169,13 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): for gen_ai_scope in gen_ai_scopes: gen_ai_test_logs.extend( self.generate_test_log_data( - count=1, log_body=large_log_body, instrumentation_scope=InstrumentationScope(gen_ai_scope, "1.0.0") + log_body=large_log_body, + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=3, + instrumentation_scope=InstrumentationScope(gen_ai_scope, "1.0.0"), ) ) @@ -165,18 +203,35 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): """Should make calls to export smaller sub-batch logs""" - large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) + large_log_body = "X" * (self.max_log_size + 1) + small_log_body = "X" * (self.max_log_size // 10 - self.base_log_size) + gen_ai_scope = InstrumentationScope("openinference.instrumentation.langchain", "1.0.0") - small_log_body = "X" * ( - int(self.processor._MAX_LOG_REQUEST_BYTE_SIZE / 10) - self.processor._BASE_LOG_BUFFER_BYTE_SIZE + + large_logs = self.generate_test_log_data( + log_body=large_log_body, + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=3, + instrumentation_scope=gen_ai_scope, + ) + + small_logs = self.generate_test_log_data( + log_body=small_log_body, + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=12, + instrumentation_scope=gen_ai_scope, ) - test_logs = self.generate_test_log_data(count=3, log_body=large_log_body, instrumentation_scope=gen_ai_scope) + # 1st, 2nd, 3rd batch = size 1 # 4th batch = size 10 # 5th batch = size 2 - small_logs = self.generate_test_log_data(count=12, log_body=small_log_body, instrumentation_scope=gen_ai_scope) - - test_logs.extend(small_logs) + test_logs = large_logs + small_logs for log in test_logs: self.processor._queue.appendleft(log) @@ -202,10 +257,29 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): expected_size = expected_sizes[i] self.assertEqual(len(batch), expected_size) + @staticmethod def generate_test_log_data( - self, log_body: AnyValue, count=5, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0") + log_body, + attr_key, + attr_val, + log_body_depth=3, + attr_depth=3, + count=5, + create_map=True, + instrumentation_scope=InstrumentationScope("test-scope", "1.0.0"), ) -> List[LogData]: + + def generate_nested_value(depth, value, create_map=True) -> AnyValue: + if depth < 0: + return value + + if create_map: + return {"t": generate_nested_value(depth - 1, value, True)} + + return [generate_nested_value(depth - 1, value, False)] + logs = [] + for i in range(count): record = LogRecord( timestamp=int(time.time_ns()), @@ -214,23 +288,11 @@ def generate_test_log_data( trace_flags=TraceFlags(1), severity_text="INFO", severity_number=SeverityNumber.INFO, - body=log_body, - attributes={"test.attribute": f"value-{i + 1}"}, + body=generate_nested_value(log_body_depth, log_body, create_map), + attributes={attr_key: generate_nested_value(attr_depth, attr_val, create_map)}, ) log_data = LogData(log_record=record, instrumentation_scope=instrumentation_scope) logs.append(log_data) return logs - - @staticmethod - def generate_nested_log_body(depth=0, expected_body: AnyValue = "test", create_map=True): - if depth < 0: - return expected_body - - if create_map: - return { - "key": TestAwsBatchLogRecordProcessor.generate_nested_log_body(depth - 1, expected_body, create_map) - } - - return [TestAwsBatchLogRecordProcessor.generate_nested_log_body(depth - 1, expected_body, create_map)] diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py similarity index 88% rename from aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py rename to aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 9f6d84b32..82491bc01 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter_test.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -8,9 +8,7 @@ from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from opentelemetry._logs.severity import SeverityNumber from opentelemetry.sdk._logs import LogData, LogRecord -from opentelemetry.sdk._logs.export import ( - LogExportResult, -) +from opentelemetry.sdk._logs.export import LogExportResult from opentelemetry.sdk.util.instrumentation import InstrumentationScope from opentelemetry.trace import TraceFlags @@ -38,7 +36,7 @@ def setUp(self): self.logs = self.generate_test_log_data() self.exporter = OTLPAwsLogExporter(endpoint=self._ENDPOINT) - @patch("requests.Session.request", return_value=good_response) + @patch("requests.Session.post", return_value=good_response) def test_export_success(self, mock_request): """Tests that the exporter always compresses the serialized logs with gzip before exporting.""" result = self.exporter.export(self.logs) @@ -56,7 +54,7 @@ def test_export_success(self, mock_request): self.assertTrue(len(data) >= 10) self.assertEqual(data[0:2], b"\x1f\x8b") - @patch("requests.Session.request", return_value=good_response) + @patch("requests.Session.post", return_value=good_response) def test_export_gen_ai_logs(self, mock_request): """Tests that when set_gen_ai_log_flag is set, the exporter includes the LLO header in the request.""" @@ -74,7 +72,7 @@ def test_export_gen_ai_logs(self, mock_request): self.assertIn(self.exporter._LARGE_LOG_HEADER, headers) self.assertEqual(headers[self.exporter._LARGE_LOG_HEADER], self.exporter._LARGE_GEN_AI_LOG_PATH_HEADER) - @patch("requests.Session.request", return_value=good_response) + @patch("requests.Session.post", return_value=good_response) def test_should_not_export_if_shutdown(self, mock_request): """Tests that no export request is made if the exporter is shutdown.""" self.exporter.shutdown() @@ -83,7 +81,7 @@ def test_should_not_export_if_shutdown(self, mock_request): mock_request.assert_not_called() self.assertEqual(result, LogExportResult.FAILURE) - @patch("requests.Session.request", return_value=non_retryable_response) + @patch("requests.Session.post", return_value=non_retryable_response) def test_should_not_export_again_if_not_retryable(self, mock_request): """Tests that only one export request is made if the response status code is non-retryable.""" result = self.exporter.export(self.logs) @@ -94,7 +92,7 @@ def test_should_not_export_again_if_not_retryable(self, mock_request): @patch( "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None ) - @patch("requests.Session.request", return_value=retryable_response_no_header) + @patch("requests.Session.post", return_value=retryable_response_no_header) def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header(self, mock_request, mock_sleep): """Tests that multiple export requests are made with exponential delay if the response status code is retryable. But there is no Retry-After header.""" @@ -116,7 +114,7 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None ) @patch( - "requests.Session.request", + "requests.Session.post", side_effect=[retryable_response_header, retryable_response_header, retryable_response_header, good_response], ) def test_should_export_again_with_server_delay_if_retryable_and_retry_after_header(self, mock_request, mock_sleep): @@ -136,7 +134,7 @@ def test_should_export_again_with_server_delay_if_retryable_and_retry_after_head "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None ) @patch( - "requests.Session.request", + "requests.Session.post", side_effect=[ retryable_response_bad_header, retryable_response_bad_header, @@ -159,6 +157,14 @@ def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after self.assertEqual(mock_request.call_count, 4) self.assertEqual(result, LogExportResult.SUCCESS) + @patch("requests.Session.post", side_effect=[requests.exceptions.ConnectionError(), good_response]) + def test_export_connection_error_retry(self, mock_request): + """Tests that the exporter retries on ConnectionError.""" + result = self.exporter.export(self.logs) + + self.assertEqual(mock_request.call_count, 2) + self.assertEqual(result, LogExportResult.SUCCESS) + def generate_test_log_data(self, count=5): logs = [] for i in range(count): diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py new file mode 100644 index 000000000..3ea6031c3 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py @@ -0,0 +1,547 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import time +import unittest +from unittest.mock import Mock, patch + +from botocore.exceptions import ClientError + +from amazon.opentelemetry.distro.exporter.otlp.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter +from opentelemetry.sdk.metrics.export import Gauge, MetricExportResult +from opentelemetry.sdk.resources import Resource + + +class MockDataPoint: + """Mock datapoint for testing.""" + + def __init__(self, value=10.0, attributes=None, time_unix_nano=None): + self.value = value + self.attributes = attributes or {} + self.time_unix_nano = time_unix_nano or int(time.time() * 1_000_000_000) + + +class MockMetric: + """Mock metric for testing.""" + + def __init__(self, name="test_metric", unit="1", description="Test metric"): + self.name = name + self.unit = unit + self.description = description + + +class MockGaugeData: + """Mock gauge data that passes isinstance checks.""" + + def __init__(self, data_points=None): + self.data_points = data_points or [] + + +class MockMetricWithData: + """Mock metric with data attribute.""" + + def __init__(self, name="test_metric", unit="1", description="Test metric", data=None): + self.name = name + self.unit = unit + self.description = description + self.data = data or MockGaugeData() + + +class MockResourceMetrics: + """Mock resource metrics for testing.""" + + def __init__(self, resource=None, scope_metrics=None): + self.resource = resource or Resource.create({"service.name": "test-service"}) + self.scope_metrics = scope_metrics or [] + + +class MockScopeMetrics: + """Mock scope metrics for testing.""" + + def __init__(self, scope=None, metrics=None): + self.scope = scope or Mock() + self.metrics = metrics or [] + + +# pylint: disable=too-many-public-methods +class TestAwsCloudWatchEmfExporter(unittest.TestCase): + """Test AwsCloudWatchEmfExporter class.""" + + def setUp(self): + """Set up test fixtures.""" + # Mock the botocore session to avoid AWS calls + with patch("botocore.session.Session") as mock_session: + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.create_log_group.return_value = {} + mock_client.create_log_stream.return_value = {} + + self.exporter = AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + + def test_initialization(self): + """Test exporter initialization.""" + self.assertEqual(self.exporter.namespace, "TestNamespace") + self.assertIsNotNone(self.exporter.log_stream_name) + self.assertEqual(self.exporter.log_group_name, "test-log-group") + + @patch("botocore.session.Session") + def test_initialization_with_custom_params(self, mock_session): + """Test exporter initialization with custom parameters.""" + # Mock the botocore session to avoid AWS calls + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.create_log_group.return_value = {} + mock_client.create_log_stream.return_value = {} + + exporter = AwsCloudWatchEmfExporter( + namespace="CustomNamespace", + log_group_name="custom-log-group", + log_stream_name="custom-stream", + aws_region="us-west-2", + ) + self.assertEqual(exporter.namespace, "CustomNamespace") + self.assertEqual(exporter.log_group_name, "custom-log-group") + self.assertEqual(exporter.log_stream_name, "custom-stream") + + def test_get_unit_mapping(self): + """Test unit mapping functionality.""" + # Test known units from UNIT_MAPPING + self.assertEqual( + self.exporter._get_unit(self.exporter._create_metric_record("test", "ms", "test")), "Milliseconds" + ) + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "s", "test")), "Seconds") + self.assertEqual( + self.exporter._get_unit(self.exporter._create_metric_record("test", "us", "test")), "Microseconds" + ) + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "By", "test")), "Bytes") + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "bit", "test")), "Bits") + + # Test units that map to empty string (should return empty string from mapping) + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "1", "test")), "") + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "ns", "test")), "") + + # Test EMF supported units directly (should return as-is) + self.assertEqual(self.exporter._get_unit(self.exporter._create_metric_record("test", "Count", "test")), "Count") + self.assertEqual( + self.exporter._get_unit(self.exporter._create_metric_record("test", "Percent", "test")), "Percent" + ) + self.assertEqual( + self.exporter._get_unit(self.exporter._create_metric_record("test", "Kilobytes", "test")), "Kilobytes" + ) + + # Test unknown unit (not in mapping and not in supported units, returns None) + self.assertIsNone(self.exporter._get_unit(self.exporter._create_metric_record("test", "unknown", "test"))) + + # Test empty unit (should return None due to falsy check) + self.assertIsNone(self.exporter._get_unit(self.exporter._create_metric_record("test", "", "test"))) + + # Test None unit + self.assertIsNone(self.exporter._get_unit(self.exporter._create_metric_record("test", None, "test"))) + + def test_get_metric_name(self): + """Test metric name extraction.""" + # Test with record that has name attribute + record = Mock() + record.name = "test_metric" + + result = self.exporter._get_metric_name(record) + self.assertEqual(result, "test_metric") + + # Test with record that has empty name (should return None) + record_empty = Mock() + record_empty.name = "" + + result_empty = self.exporter._get_metric_name(record_empty) + self.assertIsNone(result_empty) + + def test_get_dimension_names(self): + """Test dimension names extraction.""" + attributes = {"service.name": "test-service", "env": "prod", "region": "us-east-1"} + + result = self.exporter._get_dimension_names(attributes) + + # Should return all attribute keys + self.assertEqual(set(result), {"service.name", "env", "region"}) + + def test_get_attributes_key(self): + """Test attributes key generation.""" + attributes = {"service": "test", "env": "prod"} + + result = self.exporter._get_attributes_key(attributes) + + # Should be a string representation of sorted attributes + self.assertIsInstance(result, str) + self.assertIn("service", result) + self.assertIn("test", result) + self.assertIn("env", result) + self.assertIn("prod", result) + + def test_get_attributes_key_consistent(self): + """Test that attributes key generation is consistent.""" + # Same attributes in different order should produce same key + attrs1 = {"b": "2", "a": "1"} + attrs2 = {"a": "1", "b": "2"} + + key1 = self.exporter._get_attributes_key(attrs1) + key2 = self.exporter._get_attributes_key(attrs2) + + self.assertEqual(key1, key2) + + def test_group_by_attributes_and_timestamp(self): + """Test grouping by attributes and timestamp.""" + record = Mock() + record.attributes = {"env": "test"} + record.timestamp = 1234567890 + + result = self.exporter._group_by_attributes_and_timestamp(record) + + # Should return a tuple with attributes key and timestamp + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 2) + self.assertEqual(result[1], 1234567890) + + def test_generate_log_stream_name(self): + """Test log stream name generation.""" + name1 = self.exporter._generate_log_stream_name() + name2 = self.exporter._generate_log_stream_name() + + # Should generate unique names + self.assertNotEqual(name1, name2) + self.assertTrue(name1.startswith("otel-python-")) + self.assertTrue(name2.startswith("otel-python-")) + + def test_normalize_timestamp(self): + """Test timestamp normalization.""" + timestamp_ns = 1609459200000000000 # 2021-01-01 00:00:00 in nanoseconds + expected_ms = 1609459200000 # Same time in milliseconds + + result = self.exporter._normalize_timestamp(timestamp_ns) + self.assertEqual(result, expected_ms) + + def test_create_metric_record(self): + """Test metric record creation.""" + record = self.exporter._create_metric_record("test_metric", "Count", "Test description") + + self.assertIsNotNone(record) + self.assertEqual(record.name, "test_metric") + self.assertEqual(record.unit, "Count") + self.assertEqual(record.description, "Test description") + + def test_convert_gauge(self): + """Test gauge conversion.""" + metric = MockMetric("gauge_metric", "Count", "Gauge description") + dp = MockDataPoint(value=42.5, attributes={"key": "value"}) + + record = self.exporter._convert_gauge(metric, dp) + + self.assertIsNotNone(record) + self.assertEqual(record.name, "gauge_metric") + self.assertEqual(record.value, 42.5) + self.assertEqual(record.attributes, {"key": "value"}) + self.assertIsInstance(record.timestamp, int) + + def test_create_emf_log(self): + """Test EMF log creation.""" + # Create test records + gauge_record = self.exporter._create_metric_record("gauge_metric", "Count", "Gauge") + gauge_record.value = 50.0 + gauge_record.timestamp = int(time.time() * 1000) + gauge_record.attributes = {"env": "test"} + + records = [gauge_record] + resource = Resource.create({"service.name": "test-service"}) + + result = self.exporter._create_emf_log(records, resource) + + self.assertIsInstance(result, dict) + + # Check that the result is JSON serializable + json.dumps(result) # Should not raise exception + + @patch("botocore.session.Session") + def test_export_success(self, mock_session): + """Test successful export.""" + # Mock CloudWatch Logs client + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.put_log_events.return_value = {"nextSequenceToken": "12345"} + + # Create empty metrics data to test basic export flow + metrics_data = Mock() + metrics_data.resource_metrics = [] + + result = self.exporter.export(metrics_data) + + self.assertEqual(result, MetricExportResult.SUCCESS) + + def test_export_failure(self): + """Test export failure handling.""" + # Create metrics data that will cause an exception during iteration + metrics_data = Mock() + # Make resource_metrics raise an exception when iterated over + metrics_data.resource_metrics = Mock() + metrics_data.resource_metrics.__iter__ = Mock(side_effect=Exception("Test exception")) + + result = self.exporter.export(metrics_data) + + self.assertEqual(result, MetricExportResult.FAILURE) + + def test_force_flush_no_pending_events(self): + """Test force flush functionality with no pending events.""" + result = self.exporter.force_flush() + + self.assertTrue(result) + + @patch.object(AwsCloudWatchEmfExporter, "force_flush") + def test_shutdown(self, mock_force_flush): + """Test shutdown functionality.""" + mock_force_flush.return_value = True + + result = self.exporter.shutdown(timeout_millis=5000) + + self.assertTrue(result) + mock_force_flush.assert_called_once_with(5000) + + def test_send_log_event_method_exists(self): + """Test that _send_log_event method exists and can be called.""" + # Just test that the method exists and doesn't crash with basic input + log_event = {"message": "test message", "timestamp": 1234567890} + + # Mock the AWS client methods to avoid actual AWS calls + with patch.object(self.exporter.logs_client, "create_log_group"): + with patch.object(self.exporter.logs_client, "create_log_stream"): + with patch.object(self.exporter.logs_client, "put_log_events") as mock_put: + mock_put.return_value = {"nextSequenceToken": "12345"} + + # Should not raise an exception + try: + response = self.exporter._send_log_event(log_event) + # Response may be None or a dict, both are acceptable + self.assertTrue(response is None or isinstance(response, dict)) + except ClientError as error: + self.fail(f"_send_log_event raised an exception: {error}") + + def test_create_emf_log_with_resource(self): + """Test EMF log creation with resource attributes.""" + # Create test records + gauge_record = self.exporter._create_metric_record("gauge_metric", "Count", "Gauge") + gauge_record.value = 50.0 + gauge_record.timestamp = int(time.time() * 1000) + gauge_record.attributes = {"env": "test", "service": "api"} + + records = [gauge_record] + resource = Resource.create({"service.name": "test-service", "service.version": "1.0.0"}) + + result = self.exporter._create_emf_log(records, resource, 1234567890) + + # Verify EMF log structure + self.assertIn("_aws", result) + self.assertIn("CloudWatchMetrics", result["_aws"]) + self.assertEqual(result["_aws"]["Timestamp"], 1234567890) + self.assertEqual(result["Version"], "1") + + # Check resource attributes are prefixed + self.assertEqual(result["otel.resource.service.name"], "test-service") + self.assertEqual(result["otel.resource.service.version"], "1.0.0") + + # Check metric attributes + self.assertEqual(result["env"], "test") + self.assertEqual(result["service"], "api") + + # Check metric value + self.assertEqual(result["gauge_metric"], 50.0) + + # Check CloudWatch metrics structure + cw_metrics = result["_aws"]["CloudWatchMetrics"][0] + self.assertEqual(cw_metrics["Namespace"], "TestNamespace") + self.assertEqual(set(cw_metrics["Dimensions"][0]), {"env", "service"}) + self.assertEqual(cw_metrics["Metrics"][0]["Name"], "gauge_metric") + + @patch("botocore.session.Session") + def test_export_with_gauge_metrics(self, mock_session): + """Test exporting actual gauge metrics.""" + # Mock CloudWatch Logs client + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.put_log_events.return_value = {"nextSequenceToken": "12345"} + mock_client.create_log_group.side_effect = ClientError( + {"Error": {"Code": "ResourceAlreadyExistsException"}}, "CreateLogGroup" + ) + mock_client.create_log_stream.side_effect = ClientError( + {"Error": {"Code": "ResourceAlreadyExistsException"}}, "CreateLogStream" + ) + + # Create mock metrics data + resource = Resource.create({"service.name": "test-service"}) + + # Create gauge data + gauge_data = Gauge(data_points=[MockDataPoint(value=42.0, attributes={"key": "value"})]) + + metric = MockMetricWithData(name="test_gauge", data=gauge_data) + + scope_metrics = MockScopeMetrics(metrics=[metric]) + resource_metrics = MockResourceMetrics(resource=resource, scope_metrics=[scope_metrics]) + + metrics_data = Mock() + metrics_data.resource_metrics = [resource_metrics] + + result = self.exporter.export(metrics_data) + + self.assertEqual(result, MetricExportResult.SUCCESS) + # Test validates that export works with gauge metrics + + def test_get_metric_name_fallback(self): + """Test metric name extraction fallback.""" + # Test with record that has no instrument attribute + record = Mock(spec=[]) + + result = self.exporter._get_metric_name(record) + self.assertIsNone(result) + + def test_get_metric_name_empty_name(self): + """Test metric name extraction with empty name.""" + # Test with record that has empty name + record = Mock() + record.name = "" + + result = self.exporter._get_metric_name(record) + self.assertIsNone(result) + + def test_create_emf_log_skips_empty_metric_names(self): + """Test that EMF log creation skips records with empty metric names.""" + # Create a record with no metric name + record_without_name = Mock() + record_without_name.attributes = {"key": "value"} + record_without_name.value = 10.0 + record_without_name.name = None # No valid name + + # Create a record with valid metric name + valid_record = self.exporter._create_metric_record("valid_metric", "Count", "Valid metric") + valid_record.value = 20.0 + valid_record.attributes = {"key": "value"} + + records = [record_without_name, valid_record] + resource = Resource.create({"service.name": "test-service"}) + + result = self.exporter._create_emf_log(records, resource, 1234567890) + + # Only the valid record should be processed + self.assertIn("valid_metric", result) + self.assertEqual(result["valid_metric"], 20.0) + + # Check that only the valid metric is in the definitions (empty names are skipped) + cw_metrics = result["_aws"]["CloudWatchMetrics"][0] + self.assertEqual(len(cw_metrics["Metrics"]), 1) + # Ensure our valid metric is present + metric_names = [m["Name"] for m in cw_metrics["Metrics"]] + self.assertIn("valid_metric", metric_names) + + @patch("os.environ.get") + @patch("botocore.session.Session") + def test_initialization_with_env_region(self, mock_session, mock_env_get): + """Test initialization with AWS region from environment.""" + # Mock environment variable + mock_env_get.side_effect = lambda key: "us-west-1" if key == "AWS_REGION" else None + + # Mock the botocore session to avoid AWS calls + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + mock_client.create_log_group.return_value = {} + mock_client.create_log_stream.return_value = {} + + exporter = AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + + # Just verify the exporter was created successfully with region handling + self.assertIsNotNone(exporter) + self.assertEqual(exporter.namespace, "TestNamespace") + + @patch("botocore.session.Session") + def test_ensure_log_group_exists_create_failure(self, mock_session): + """Test log group creation failure.""" + # Mock the botocore session + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + + # Make create fail with access denied error + mock_client.create_log_group.side_effect = ClientError({"Error": {"Code": "AccessDenied"}}, "CreateLogGroup") + mock_client.create_log_stream.return_value = {} + + with self.assertRaises(ClientError): + AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + + @patch("botocore.session.Session") + def test_ensure_log_group_exists_success(self, mock_session): + """Test log group existence check when log group already exists.""" + # Mock the botocore session + mock_client = Mock() + mock_session_instance = Mock() + mock_session.return_value = mock_session_instance + mock_session_instance.create_client.return_value = mock_client + + # Make create fail with ResourceAlreadyExistsException (log group exists) + mock_client.create_log_group.side_effect = ClientError( + {"Error": {"Code": "ResourceAlreadyExistsException"}}, "CreateLogGroup" + ) + mock_client.create_log_stream.return_value = {} + + # This should not raise an exception + exporter = AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + self.assertIsNotNone(exporter) + # Verify create was called once + mock_client.create_log_group.assert_called_once_with(logGroupName="test-log-group") + + def test_export_with_unsupported_metric_type(self): + """Test export with unsupported metric types.""" + # Create mock metrics data with unsupported metric type + resource = Resource.create({"service.name": "test-service"}) + + # Create non-gauge data + unsupported_data = Mock() + unsupported_data.data_points = [MockDataPoint(value=42.0)] + + metric = MockMetricWithData(name="test_counter", data=unsupported_data) + + scope_metrics = MockScopeMetrics(metrics=[metric]) + resource_metrics = MockResourceMetrics(resource=resource, scope_metrics=[scope_metrics]) + + metrics_data = Mock() + metrics_data.resource_metrics = [resource_metrics] + + # Should still return success even with unsupported metrics + result = self.exporter.export(metrics_data) + self.assertEqual(result, MetricExportResult.SUCCESS) + + def test_export_with_metric_without_data(self): + """Test export with metrics that don't have data attribute.""" + # Create mock metrics data + resource = Resource.create({"service.name": "test-service"}) + + # Create metric without data attribute + metric = Mock(spec=[]) + + scope_metrics = MockScopeMetrics(metrics=[metric]) + resource_metrics = MockResourceMetrics(resource=resource, scope_metrics=[scope_metrics]) + + metrics_data = Mock() + metrics_data.resource_metrics = [resource_metrics] + + # Should still return success + result = self.exporter.export(metrics_data) + self.assertEqual(result, MetricExportResult.SUCCESS) + + +if __name__ == "__main__": + unittest.main() diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_otlp_aws_span_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py similarity index 100% rename from aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_otlp_aws_span_exporter.py rename to aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_auth_session.py deleted file mode 100644 index e0c62b89d..000000000 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_auth_session.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 -from unittest import TestCase -from unittest.mock import patch - -import requests -from botocore.credentials import Credentials - -from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession - -AWS_OTLP_TRACES_ENDPOINT = "https://xray.us-east-1.amazonaws.com/v1/traces" -AWS_OTLP_LOGS_ENDPOINT = "https://logs.us-east-1.amazonaws.com/v1/logs" - -AUTHORIZATION_HEADER = "Authorization" -X_AMZ_DATE_HEADER = "X-Amz-Date" -X_AMZ_SECURITY_TOKEN_HEADER = "X-Amz-Security-Token" - -mock_credentials = Credentials(access_key="test_access_key", secret_key="test_secret_key", token="test_session_token") - - -class TestAwsAuthSession(TestCase): - @patch("pkg_resources.get_distribution", side_effect=ImportError("test error")) - @patch.dict("sys.modules", {"botocore": None}, clear=False) - @patch("requests.Session.request", return_value=requests.Response()) - def test_aws_auth_session_no_botocore(self, _, __): - """Tests that aws_auth_session will not inject SigV4 Headers if botocore is not installed.""" - - session = AwsAuthSession("us-east-1", "xray") - actual_headers = {"test": "test"} - - session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) - - self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) - self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) - self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) - - @patch("requests.Session.request", return_value=requests.Response()) - @patch("botocore.session.Session.get_credentials", return_value=None) - def test_aws_auth_session_no_credentials(self, _, __): - """Tests that aws_auth_session will not inject SigV4 Headers if retrieving credentials returns None.""" - - session = AwsAuthSession("us-east-1", "xray") - actual_headers = {"test": "test"} - - session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) - - self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) - self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) - self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) - - @patch("requests.Session.request", return_value=requests.Response()) - @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) - def test_aws_auth_session(self, _, __): - """Tests that aws_auth_session will inject SigV4 Headers if botocore is installed.""" - - session = AwsAuthSession("us-east-1", "xray") - actual_headers = {"test": "test"} - - session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) - - self.assertIn(AUTHORIZATION_HEADER, actual_headers) - self.assertIn(X_AMZ_DATE_HEADER, actual_headers) - self.assertIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py index d122519cf..f99b0d154 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py @@ -37,12 +37,12 @@ AWS_STEPFUNCTIONS_STATEMACHINE_ARN, ) from amazon.opentelemetry.distro._aws_metric_attribute_generator import _AwsMetricAttributeGenerator -from amazon.opentelemetry.distro._aws_span_processing_util import GEN_AI_REQUEST_MODEL from amazon.opentelemetry.distro.metric_attribute_generator import DEPENDENCY_METRIC, SERVICE_METRIC from opentelemetry.attributes import BoundedAttributes from opentelemetry.sdk.resources import _DEFAULT_RESOURCE, SERVICE_NAME from opentelemetry.sdk.trace import ReadableSpan, Resource from opentelemetry.sdk.util.instrumentation import InstrumentationScope +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_REQUEST_MODEL from opentelemetry.semconv.trace import MessagingOperationValues, SpanAttributes from opentelemetry.trace import SpanContext, SpanKind from opentelemetry.util.types import Attributes diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 13397a0d5..dbaee3c33 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -50,6 +50,7 @@ from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.metrics import get_meter_provider from opentelemetry.processor.baggage import BaggageSpanProcessor from opentelemetry.sdk.environment_variables import OTEL_TRACES_SAMPLER, OTEL_TRACES_SAMPLER_ARG from opentelemetry.sdk.metrics._internal.export import PeriodicExportingMetricReader @@ -87,6 +88,22 @@ def setUpClass(cls): aws_otel_configurator.configure() cls.tracer_provider: TracerProvider = get_tracer_provider() + @classmethod + def tearDownClass(cls): + # Explicitly shut down meter provider to avoid I/O errors on Python 3.9 with gevent + # This ensures ConsoleMetricExporter is properly closed before Python cleanup + try: + meter_provider = get_meter_provider() + if hasattr(meter_provider, "force_flush"): + meter_provider.force_flush() + if hasattr(meter_provider, "shutdown"): + meter_provider.shutdown() + except (ValueError, RuntimeError): + # Ignore errors during cleanup: + # - ValueError: I/O operation on closed file (the exact error we're trying to prevent) + # - RuntimeError: Provider already shut down or threading issues + pass + def tearDown(self): os.environ.pop("OTEL_AWS_APPLICATION_SIGNALS_ENABLED", None) os.environ.pop("OTEL_AWS_APPLICATION_SIGNALS_RUNTIME_ENABLED", None) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelemetry_distro.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelemetry_distro.py index b77e4fbf8..7368a04c8 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelemetry_distro.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelemetry_distro.py @@ -1,13 +1,12 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +from importlib.metadata import PackageNotFoundError, version from unittest import TestCase -from pkg_resources import DistributionNotFound, require - class TestAwsOpenTelemetryDistro(TestCase): def test_package_available(self): try: - require(["aws-opentelemetry-distro"]) - except DistributionNotFound: + version("aws-opentelemetry-distro") + except PackageNotFoundError: self.fail("aws-opentelemetry-distro not installed") diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py index 87e6c4810..8eff6f2e6 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py @@ -1,17 +1,15 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -import json -import math import os -from io import BytesIO +from importlib.metadata import PackageNotFoundError from typing import Any, Dict from unittest import TestCase from unittest.mock import MagicMock, patch import gevent.monkey -import pkg_resources -from botocore.response import StreamingBody +import opentelemetry.sdk.extension.aws.resource.ec2 as ec2_resource +import opentelemetry.sdk.extension.aws.resource.eks as eks_resource from amazon.opentelemetry.distro.patches._instrumentation_patch import ( AWS_GEVENT_PATCH_MODULES, apply_instrumentation_patches, @@ -38,7 +36,7 @@ _LAMBDA_SOURCE_MAPPING_ID: str = "lambdaEventSourceMappingID" # Patch names -GET_DISTRIBUTION_PATCH: str = "amazon.opentelemetry.distro._utils.pkg_resources.get_distribution" +IMPORTLIB_METADATA_VERSION_PATCH: str = "amazon.opentelemetry.distro._utils.version" class TestInstrumentationPatch(TestCase): @@ -60,7 +58,7 @@ class TestInstrumentationPatch(TestCase): def test_instrumentation_patch(self): # Set up method patches used by all tests - self.method_patches[GET_DISTRIBUTION_PATCH] = patch(GET_DISTRIBUTION_PATCH).start() + self.method_patches[IMPORTLIB_METADATA_VERSION_PATCH] = patch(IMPORTLIB_METADATA_VERSION_PATCH).start() # Run tests that validate patch behaviour before and after patching self._run_patch_behaviour_tests() @@ -73,7 +71,7 @@ def test_instrumentation_patch(self): def _run_patch_behaviour_tests(self): # Test setup - self.method_patches[GET_DISTRIBUTION_PATCH].return_value = "CorrectDistributionObject" + self.method_patches[IMPORTLIB_METADATA_VERSION_PATCH].return_value = "1.0.0" # Test setup to not patch gevent os.environ[AWS_GEVENT_PATCH_MODULES] = "none" @@ -120,6 +118,8 @@ def _run_patch_mechanism_tests(self): """ self._test_botocore_installed_flag() self._reset_mocks() + self._test_resource_detector_patches() + self._reset_mocks() def _test_unpatched_botocore_instrumentation(self): # Kinesis @@ -147,7 +147,7 @@ def _test_unpatched_botocore_instrumentation(self): ) # BedrockRuntime - self.assertFalse("bedrock-runtime" in _KNOWN_EXTENSIONS, "Upstream has added a bedrock-runtime extension") + self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS, "Upstream has added a bedrock-runtime extension") # SecretsManager self.assertFalse("secretsmanager" in _KNOWN_EXTENSIONS, "Upstream has added a SecretsManager extension") @@ -213,95 +213,9 @@ def _test_patched_botocore_instrumentation(self): bedrock_agent_runtime_sucess_attributes: Dict[str, str] = _do_on_success_bedrock("bedrock-agent-runtime") self.assertEqual(len(bedrock_agent_runtime_sucess_attributes), 0) - # BedrockRuntime - Amazon Titan + # BedrockRuntime self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS) - self._test_patched_bedrock_runtime_invoke_model( - model_id="amazon.titan-embed-text-v1", - max_tokens=512, - temperature=0.9, - top_p=0.75, - finish_reason="FINISH", - input_tokens=123, - output_tokens=456, - ) - - self._test_patched_bedrock_runtime_invoke_model( - model_id="amazon.nova-pro-v1:0", - max_tokens=500, - temperature=0.9, - top_p=0.7, - finish_reason="FINISH", - input_tokens=123, - output_tokens=456, - ) - - # BedrockRuntime - Anthropic Claude - self._test_patched_bedrock_runtime_invoke_model( - model_id="anthropic.claude-v2:1", - max_tokens=512, - temperature=0.5, - top_p=0.999, - finish_reason="end_turn", - input_tokens=23, - output_tokens=36, - ) - - # BedrockRuntime - Meta LLama - self._test_patched_bedrock_runtime_invoke_model( - model_id="meta.llama2-13b-chat-v1", - max_tokens=512, - temperature=0.5, - top_p=0.9, - finish_reason="stop", - input_tokens=31, - output_tokens=36, - ) - - # BedrockRuntime - Cohere Command-r - cohere_input = "Hello, world" - cohere_output = "Goodbye, world" - - self._test_patched_bedrock_runtime_invoke_model( - model_id="cohere.command-r-v1:0", - max_tokens=512, - temperature=0.5, - top_p=0.75, - finish_reason="COMPLETE", - input_tokens=math.ceil(len(cohere_input) / 6), - output_tokens=math.ceil(len(cohere_output) / 6), - input_prompt=cohere_input, - output_prompt=cohere_output, - ) - - # BedrockRuntime - AI21 Jambda - self._test_patched_bedrock_runtime_invoke_model( - model_id="ai21.jamba-1-5-large-v1:0", - max_tokens=512, - temperature=0.5, - top_p=0.999, - finish_reason="end_turn", - input_tokens=23, - output_tokens=36, - ) - - # BedrockRuntime - Mistral - msg = "Hello World" - mistral_input = f"[INST] {msg} [/INST]" - mistral_output = "Goodbye, World" - - self._test_patched_bedrock_runtime_invoke_model( - model_id="mistral.mistral-7b-instruct-v0:2", - max_tokens=512, - temperature=0.5, - top_p=0.9, - finish_reason="stop", - input_tokens=math.ceil(len(mistral_input) / 6), - output_tokens=math.ceil(len(mistral_output) / 6), - input_prompt=mistral_input, - output_prompt=mistral_output, - ) - # SecretsManager self.assertTrue("secretsmanager" in _KNOWN_EXTENSIONS) secretsmanager_attributes: Dict[str, str] = _do_extract_secretsmanager_attributes() @@ -369,17 +283,13 @@ def _test_botocore_installed_flag(self): with patch( "amazon.opentelemetry.distro.patches._botocore_patches._apply_botocore_instrumentation_patches" ) as mock_apply_patches: - get_distribution_patch: patch = self.method_patches[GET_DISTRIBUTION_PATCH] - get_distribution_patch.side_effect = pkg_resources.DistributionNotFound - apply_instrumentation_patches() - mock_apply_patches.assert_not_called() - - get_distribution_patch.side_effect = pkg_resources.VersionConflict("botocore==1.0.0", "botocore==0.0.1") + get_distribution_patch: patch = self.method_patches[IMPORTLIB_METADATA_VERSION_PATCH] + get_distribution_patch.side_effect = PackageNotFoundError apply_instrumentation_patches() mock_apply_patches.assert_not_called() get_distribution_patch.side_effect = None - get_distribution_patch.return_value = "CorrectDistributionObject" + get_distribution_patch.return_value = "1.0.0" apply_instrumentation_patches() mock_apply_patches.assert_called() @@ -389,146 +299,6 @@ def _test_patched_bedrock_instrumentation(self): self.assertEqual(len(bedrock_sucess_attributes), 1) self.assertEqual(bedrock_sucess_attributes["aws.bedrock.guardrail.id"], _BEDROCK_GUARDRAIL_ID) - def _test_patched_bedrock_runtime_invoke_model(self, **args): - model_id = args.get("model_id", None) - max_tokens = args.get("max_tokens", None) - temperature = args.get("temperature", None) - top_p = args.get("top_p", None) - finish_reason = args.get("finish_reason", None) - input_tokens = args.get("input_tokens", None) - output_tokens = args.get("output_tokens", None) - input_prompt = args.get("input_prompt", None) - output_prompt = args.get("output_prompt", None) - - def get_model_response_request(): - request_body = {} - response_body = {} - - if "amazon.titan" in model_id: - request_body = { - "textGenerationConfig": { - "maxTokenCount": max_tokens, - "temperature": temperature, - "topP": top_p, - } - } - - response_body = { - "inputTextTokenCount": input_tokens, - "results": [ - { - "tokenCount": output_tokens, - "outputText": "testing", - "completionReason": finish_reason, - } - ], - } - - if "amazon.nova" in model_id: - request_body = { - "inferenceConfig": { - "max_new_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - } - - response_body = { - "output": {"message": {"content": [{"text": ""}], "role": "assistant"}}, - "stopReason": finish_reason, - "usage": {"inputTokens": input_tokens, "outputTokens": output_tokens}, - } - - if "anthropic.claude" in model_id: - request_body = { - "anthropic_version": "bedrock-2023-05-31", - "max_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - - response_body = { - "stop_reason": finish_reason, - "stop_sequence": None, - "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens}, - } - - if "ai21.jamba" in model_id: - request_body = { - "max_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - - response_body = { - "choices": [{"finish_reason": finish_reason}], - "usage": { - "prompt_tokens": input_tokens, - "completion_tokens": output_tokens, - "total_tokens": (input_tokens + output_tokens), - }, - } - - if "meta.llama" in model_id: - request_body = { - "max_gen_len": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - - response_body = { - "prompt_token_count": input_tokens, - "generation_token_count": output_tokens, - "stop_reason": finish_reason, - } - - if "cohere.command" in model_id: - request_body = { - "message": input_prompt, - "max_tokens": max_tokens, - "temperature": temperature, - "p": top_p, - } - - response_body = { - "text": output_prompt, - "finish_reason": finish_reason, - } - - if "mistral" in model_id: - request_body = { - "prompt": input_prompt, - "max_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - } - - response_body = {"outputs": [{"text": output_prompt, "stop_reason": finish_reason}]} - - json_bytes = json.dumps(response_body).encode("utf-8") - - return json.dumps(request_body), StreamingBody(BytesIO(json_bytes), len(json_bytes)) - - request_body, response_body = get_model_response_request() - - bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock( - "bedrock-runtime", model_id=model_id, request_body=request_body - ) - bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock( - "bedrock-runtime", model_id=model_id, streaming_body=response_body - ) - - bedrock_runtime_attributes.update(bedrock_runtime_success_attributes) - - self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM) - self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], model_id) - self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], max_tokens) - self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], temperature) - self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], top_p) - self.assertEqual(bedrock_runtime_attributes["gen_ai.usage.input_tokens"], input_tokens) - self.assertEqual(bedrock_runtime_attributes["gen_ai.usage.output_tokens"], output_tokens) - self.assertEqual(bedrock_runtime_attributes["gen_ai.response.finish_reasons"], [finish_reason]) - def _test_patched_bedrock_agent_instrumentation(self): """For bedrock-agent service, both extract_attributes and on_success provides attributes, the attributes depend on the API being invoked.""" @@ -586,6 +356,53 @@ def _test_patched_bedrock_agent_instrumentation(self): self.assertEqual(len(bedrock_agent_success_attributes), 1) self.assertEqual(bedrock_agent_success_attributes[attribute_tuple[0]], attribute_tuple[1]) + def _test_resource_detector_patches(self): + """Test that resource detector patches are applied and work correctly""" + # Test that the functions were patched + self.assertIsNotNone(ec2_resource._aws_http_request) + self.assertIsNotNone(eks_resource._aws_http_request) + + # Test EC2 patched function + with patch("amazon.opentelemetry.distro.patches._resource_detector_patches.urlopen") as mock_urlopen: + mock_response = MagicMock() + mock_response.read.return_value = b'{"test": "ec2-data"}' + mock_urlopen.return_value.__enter__.return_value = mock_response + + result = ec2_resource._aws_http_request("GET", "/test/path", {"X-Test": "header"}) + self.assertEqual(result, '{"test": "ec2-data"}') + + # Verify the request was made correctly + args, kwargs = mock_urlopen.call_args + request = args[0] + self.assertEqual(request.full_url, "http://169.254.169.254/test/path") + self.assertEqual(request.headers, {"X-test": "header"}) + self.assertEqual(kwargs["timeout"], 5) + + # Test EKS patched function + with patch("amazon.opentelemetry.distro.patches._resource_detector_patches.urlopen") as mock_urlopen, patch( + "amazon.opentelemetry.distro.patches._resource_detector_patches.ssl.create_default_context" + ) as mock_ssl: + mock_response = MagicMock() + mock_response.read.return_value = b'{"test": "eks-data"}' + mock_urlopen.return_value.__enter__.return_value = mock_response + + mock_context = MagicMock() + mock_ssl.return_value = mock_context + + result = eks_resource._aws_http_request("GET", "/api/v1/test", "Bearer token123") + self.assertEqual(result, '{"test": "eks-data"}') + + # Verify the request was made correctly + args, kwargs = mock_urlopen.call_args + request = args[0] + self.assertEqual(request.full_url, "https://kubernetes.default.svc/api/v1/test") + self.assertEqual(request.headers, {"Authorization": "Bearer token123"}) + self.assertEqual(kwargs["timeout"], 5) + self.assertEqual(kwargs["context"], mock_context) + + # Verify SSL context was created with correct CA file + mock_ssl.assert_called_once_with(cafile="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + def _reset_mocks(self): for method_patch in self.method_patches.values(): method_patch.reset_mock() @@ -678,6 +495,7 @@ def _do_on_success( ) -> Dict[str, str]: span_mock: Span = MagicMock() mock_call_context = MagicMock() + mock_instrumentor_context = MagicMock() span_attributes: Dict[str, str] = {} def set_side_effect(set_key, set_value): @@ -692,6 +510,6 @@ def set_side_effect(set_key, set_value): mock_call_context.params = params extension = _KNOWN_EXTENSIONS[service_name]()(mock_call_context) - extension.on_success(span_mock, result) + extension.on_success(span_mock, result, mock_instrumentor_context) return span_attributes diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py new file mode 100644 index 000000000..0839aec98 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py @@ -0,0 +1,96 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import os +from importlib.metadata import PackageNotFoundError +from unittest import TestCase +from unittest.mock import patch + +from amazon.opentelemetry.distro._utils import AGENT_OBSERVABILITY_ENABLED, is_agent_observability_enabled, is_installed + + +class TestUtils(TestCase): + def setUp(self): + # Store original env var if it exists + self.original_env = os.environ.get(AGENT_OBSERVABILITY_ENABLED) + + def tearDown(self): + # Restore original env var + if self.original_env is not None: + os.environ[AGENT_OBSERVABILITY_ENABLED] = self.original_env + elif AGENT_OBSERVABILITY_ENABLED in os.environ: + del os.environ[AGENT_OBSERVABILITY_ENABLED] + + def test_is_installed_package_not_found(self): + """Test is_installed returns False when package is not found""" + with patch("amazon.opentelemetry.distro._utils.version") as mock_version: + # Simulate package not found + mock_version.side_effect = PackageNotFoundError("test-package") + + result = is_installed("test-package>=1.0.0") + self.assertFalse(result) + + def test_is_installed(self): + """Test is_installed returns True when version matches the specifier""" + with patch("amazon.opentelemetry.distro._utils.version") as mock_version: + # Package is installed and version matches requirement + mock_version.return_value = "2.5.0" + + # Test with compatible version requirement + result = is_installed("test-package>=2.0.0") + self.assertTrue(result) + + # Test with exact version match + mock_version.return_value = "1.0.0" + result = is_installed("test-package==1.0.0") + self.assertTrue(result) + + # Test with version range + mock_version.return_value = "1.5.0" + result = is_installed("test-package>=1.0,<2.0") + self.assertTrue(result) + + def test_is_installed_version_mismatch(self): + """Test is_installed returns False when version doesn't match""" + with patch("amazon.opentelemetry.distro._utils.version") as mock_version: + # Package is installed but version doesn't match requirement + mock_version.return_value = "1.0.0" + + # Test with incompatible version requirement + result = is_installed("test-package>=2.0.0") + self.assertFalse(result) + + def test_is_agent_observability_enabled_various_values(self): + """Test is_agent_observability_enabled with various environment variable values""" + # Test with "True" (uppercase) + os.environ[AGENT_OBSERVABILITY_ENABLED] = "True" + self.assertTrue(is_agent_observability_enabled()) + + # Test with "TRUE" (all caps) + os.environ[AGENT_OBSERVABILITY_ENABLED] = "TRUE" + self.assertTrue(is_agent_observability_enabled()) + + # Test with "true" (lowercase) + os.environ[AGENT_OBSERVABILITY_ENABLED] = "true" + self.assertTrue(is_agent_observability_enabled()) + + # Test with "false" + os.environ[AGENT_OBSERVABILITY_ENABLED] = "false" + self.assertFalse(is_agent_observability_enabled()) + + # Test with "False" + os.environ[AGENT_OBSERVABILITY_ENABLED] = "False" + self.assertFalse(is_agent_observability_enabled()) + + # Test with arbitrary string + os.environ[AGENT_OBSERVABILITY_ENABLED] = "yes" + self.assertFalse(is_agent_observability_enabled()) + + # Test with empty string + os.environ[AGENT_OBSERVABILITY_ENABLED] = "" + self.assertFalse(is_agent_observability_enabled()) + + # Test when env var is not set + if AGENT_OBSERVABILITY_ENABLED in os.environ: + del os.environ[AGENT_OBSERVABILITY_ENABLED] + self.assertFalse(is_agent_observability_enabled()) diff --git a/contract-tests/images/applications/botocore/botocore_server.py b/contract-tests/images/applications/botocore/botocore_server.py index 6c315a4dc..80ecbc6fe 100644 --- a/contract-tests/images/applications/botocore/botocore_server.py +++ b/contract-tests/images/applications/botocore/botocore_server.py @@ -435,7 +435,7 @@ def get_model_request_response(path): "inferenceConfig": { "max_new_tokens": 800, "temperature": 0.9, - "top_p": 0.7, + "topP": 0.7, }, } @@ -496,32 +496,6 @@ def get_model_request_response(path): "text": "test-generation-text", } - if "ai21.jamba" in path: - model_id = "ai21.jamba-1-5-large-v1:0" - - request_body = { - "messages": [ - { - "role": "user", - "content": prompt, - }, - ], - "top_p": 0.8, - "temperature": 0.6, - "max_tokens": 512, - } - - response_body = { - "stop_reason": "end_turn", - "usage": { - "prompt_tokens": 21, - "completion_tokens": 24, - }, - "choices": [ - {"finish_reason": "stop"}, - ], - } - if "mistral" in path: model_id = "mistral.mistral-7b-instruct-v0:2" diff --git a/contract-tests/images/applications/botocore/requirements.txt b/contract-tests/images/applications/botocore/requirements.txt index 25113e3f4..61ddebf98 100644 --- a/contract-tests/images/applications/botocore/requirements.txt +++ b/contract-tests/images/applications/botocore/requirements.txt @@ -1,5 +1,3 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 botocore==1.34.143 boto3==1.34.143 diff --git a/contract-tests/images/applications/django/requirements.txt b/contract-tests/images/applications/django/requirements.txt index 9b54a7736..84dfdeabb 100644 --- a/contract-tests/images/applications/django/requirements.txt +++ b/contract-tests/images/applications/django/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 django==5.0.11 diff --git a/contract-tests/images/applications/mysql-connector/requirements.txt b/contract-tests/images/applications/mysql-connector/requirements.txt index 9ca44d2e4..f285dcb1f 100644 --- a/contract-tests/images/applications/mysql-connector/requirements.txt +++ b/contract-tests/images/applications/mysql-connector/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 mysql-connector-python~=9.1.0 diff --git a/contract-tests/images/applications/mysqlclient/requirements.txt b/contract-tests/images/applications/mysqlclient/requirements.txt index 49c6b70f3..933e606b4 100644 --- a/contract-tests/images/applications/mysqlclient/requirements.txt +++ b/contract-tests/images/applications/mysqlclient/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 mysqlclient==2.2.4 diff --git a/contract-tests/images/applications/psycopg2/requirements.txt b/contract-tests/images/applications/psycopg2/requirements.txt index f2d278475..8786aff35 100644 --- a/contract-tests/images/applications/psycopg2/requirements.txt +++ b/contract-tests/images/applications/psycopg2/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 psycopg2==2.9.9 diff --git a/contract-tests/images/applications/pymysql/requirements.txt b/contract-tests/images/applications/pymysql/requirements.txt index ddda9b1fe..8ba76defb 100644 --- a/contract-tests/images/applications/pymysql/requirements.txt +++ b/contract-tests/images/applications/pymysql/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 pymysql==1.1.1 diff --git a/contract-tests/images/applications/requests/requirements.txt b/contract-tests/images/applications/requests/requirements.txt index 369049d22..700b31404 100644 --- a/contract-tests/images/applications/requests/requirements.txt +++ b/contract-tests/images/applications/requests/requirements.txt @@ -1,4 +1,2 @@ -opentelemetry-distro==0.46b0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 typing-extensions==4.12.2 requests~=2.0 diff --git a/contract-tests/images/mock-collector/pyproject.toml b/contract-tests/images/mock-collector/pyproject.toml index 422e2a5b1..42e13c868 100644 --- a/contract-tests/images/mock-collector/pyproject.toml +++ b/contract-tests/images/mock-collector/pyproject.toml @@ -11,9 +11,9 @@ requires-python = ">=3.9" dependencies = [ "grpcio ~= 1.66.0", - "opentelemetry-proto==1.25.0", - "opentelemetry-sdk==1.25.0", - "protobuf==4.25.2", + "opentelemetry-proto==1.33.1", + "opentelemetry-sdk==1.33.1", + "protobuf==5.26.1", "typing-extensions==4.12.2" ] diff --git a/contract-tests/images/mock-collector/requirements.txt b/contract-tests/images/mock-collector/requirements.txt index a0c5454cd..12e69148b 100644 --- a/contract-tests/images/mock-collector/requirements.txt +++ b/contract-tests/images/mock-collector/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.66.2 -opentelemetry-proto==1.25.0 -opentelemetry-sdk==1.25.0 -protobuf==4.25.2 +opentelemetry-proto==1.33.1 +opentelemetry-sdk==1.33.1 +protobuf==5.26.1 typing-extensions==4.12.2 diff --git a/contract-tests/tests/pyproject.toml b/contract-tests/tests/pyproject.toml index 0df6f6a1c..5c2895fab 100644 --- a/contract-tests/tests/pyproject.toml +++ b/contract-tests/tests/pyproject.toml @@ -10,8 +10,8 @@ license = "Apache-2.0" requires-python = ">=3.9" dependencies = [ - "opentelemetry-proto==1.25.0", - "opentelemetry-sdk==1.25.0", + "opentelemetry-proto==1.33.1", + "opentelemetry-sdk==1.33.1", "testcontainers==3.7.1", "grpcio==1.66.2", "docker==7.1.0", diff --git a/contract-tests/tests/test/amazon/botocore/botocore_test.py b/contract-tests/tests/test/amazon/botocore/botocore_test.py index ed04c9514..549ec3f50 100644 --- a/contract-tests/tests/test/amazon/botocore/botocore_test.py +++ b/contract-tests/tests/test/amazon/botocore/botocore_test.py @@ -440,7 +440,7 @@ def test_bedrock_runtime_invoke_model_amazon_titan(self): _GEN_AI_USAGE_INPUT_TOKENS: 15, _GEN_AI_USAGE_OUTPUT_TOKENS: 13, }, - span_name="Bedrock Runtime.InvokeModel", + span_name="text_completion amazon.titan-text-premier-v1:0", ) def test_bedrock_runtime_invoke_model_amazon_nova(self): @@ -458,6 +458,7 @@ def test_bedrock_runtime_invoke_model_amazon_nova(self): cloudformation_primary_identifier="amazon.nova-pro-v1:0", request_specific_attributes={ _GEN_AI_REQUEST_MODEL: "amazon.nova-pro-v1:0", + _GEN_AI_SYSTEM: "aws.bedrock", _GEN_AI_REQUEST_MAX_TOKENS: 800, _GEN_AI_REQUEST_TEMPERATURE: 0.9, _GEN_AI_REQUEST_TOP_P: 0.7, @@ -467,7 +468,7 @@ def test_bedrock_runtime_invoke_model_amazon_nova(self): _GEN_AI_USAGE_INPUT_TOKENS: 432, _GEN_AI_USAGE_OUTPUT_TOKENS: 681, }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat amazon.nova-pro-v1:0", ) def test_bedrock_runtime_invoke_model_anthropic_claude(self): @@ -495,7 +496,7 @@ def test_bedrock_runtime_invoke_model_anthropic_claude(self): _GEN_AI_USAGE_INPUT_TOKENS: 15, _GEN_AI_USAGE_OUTPUT_TOKENS: 13, }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat anthropic.claude-v2:1", ) def test_bedrock_runtime_invoke_model_meta_llama(self): @@ -523,7 +524,7 @@ def test_bedrock_runtime_invoke_model_meta_llama(self): _GEN_AI_USAGE_INPUT_TOKENS: 31, _GEN_AI_USAGE_OUTPUT_TOKENS: 49, }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat meta.llama2-13b-chat-v1", ) def test_bedrock_runtime_invoke_model_cohere_command(self): @@ -553,35 +554,7 @@ def test_bedrock_runtime_invoke_model_cohere_command(self): ), _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-generation-text") / 6), }, - span_name="Bedrock Runtime.InvokeModel", - ) - - def test_bedrock_runtime_invoke_model_ai21_jamba(self): - self.do_test_requests( - "bedrock/invokemodel/invoke-model/ai21.jamba-1-5-large-v1:0", - "GET", - 200, - 0, - 0, - rpc_service="Bedrock Runtime", - remote_service="AWS::BedrockRuntime", - remote_operation="InvokeModel", - remote_resource_type="AWS::Bedrock::Model", - remote_resource_identifier="ai21.jamba-1-5-large-v1:0", - cloudformation_primary_identifier="ai21.jamba-1-5-large-v1:0", - request_specific_attributes={ - _GEN_AI_REQUEST_MODEL: "ai21.jamba-1-5-large-v1:0", - _GEN_AI_SYSTEM: "aws.bedrock", - _GEN_AI_REQUEST_MAX_TOKENS: 512, - _GEN_AI_REQUEST_TEMPERATURE: 0.6, - _GEN_AI_REQUEST_TOP_P: 0.8, - }, - response_specific_attributes={ - _GEN_AI_RESPONSE_FINISH_REASONS: ["stop"], - _GEN_AI_USAGE_INPUT_TOKENS: 21, - _GEN_AI_USAGE_OUTPUT_TOKENS: 24, - }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat cohere.command-r-v1:0", ) def test_bedrock_runtime_invoke_model_mistral(self): @@ -611,7 +584,7 @@ def test_bedrock_runtime_invoke_model_mistral(self): ), _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-output-text") / 6), }, - span_name="Bedrock Runtime.InvokeModel", + span_name="chat mistral.mistral-7b-instruct-v0:2", ) def test_bedrock_get_guardrail(self): From b75fe99545d4ab092c66ef09f534f26c6c1d1644 Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:19:50 +0000 Subject: [PATCH 03/52] linting fix --- .../otlp/aws/logs/test_aws_batch_log_record_processor.py | 2 ++ .../exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 346b44291..5f61f40f6 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -1,3 +1,5 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 import time import unittest from typing import List diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 82491bc01..ad0a1ddca 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -1,3 +1,5 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 import time from unittest import TestCase from unittest.mock import patch From d588605ed5b68504df2cd9a4a84d86718ee3bb0a Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:26:01 +0000 Subject: [PATCH 04/52] linting fix --- .../logs/aws_batch_log_record_processor.py | 4 +-- .../test_aws_batch_log_record_processor.py | 16 ++++++------ .../aws/logs/test_otlp_aws_logs_exporter.py | 25 ++++++++++--------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index e57b03f3f..f12c9330f 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -87,7 +87,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: self._exporter.export(batch) except Exception as exception: # pylint: disable=broad-exception-caught - _logger.exception("Exception while exporting logs: " + str(exception)) + _logger.exception("Exception while exporting logs: %s", exception) detach(token) def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: @@ -145,7 +145,7 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: new_queue.append((content, current_depth + 1)) else: _logger.debug( - f"Max log depth of {depth} exceeded. Log data size will not be accurately calculated." + "Max log depth of %s exceeded. Log data size will not be accurately calculated.", depth ) queue = new_queue diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 5f61f40f6..8e639606c 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -99,9 +99,9 @@ def test_process_log_data_primitive(self): primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None] expected_sizes = [4, 4, 1, 3, 4, 5, 0] - for i in range(len(primitives)): + for index, primitive in enumerate(primitives): log = self.generate_test_log_data( - log_body=primitives[i], + log_body=primitive, attr_key="", attr_val="", log_body_depth=-1, @@ -109,7 +109,7 @@ def test_process_log_data_primitive(self): count=1, ) - expected_size = self.base_log_size + expected_sizes[i] + expected_size = self.base_log_size + expected_sizes[index] actual_size = self.processor._estimate_log_size(log[0]) self.assertEqual(actual_size, expected_size) @@ -254,9 +254,9 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): 4: 2, # 5th batch (index 4) should have 2 logs } - for i, call in enumerate(batches): + for index, call in enumerate(batches): batch = call[0][0] - expected_size = expected_sizes[i] + expected_size = expected_sizes[index] self.assertEqual(len(batch), expected_size) @staticmethod @@ -282,11 +282,11 @@ def generate_nested_value(depth, value, create_map=True) -> AnyValue: logs = [] - for i in range(count): + for index in range(count): record = LogRecord( timestamp=int(time.time_ns()), - trace_id=int(f"0x{i + 1:032x}", 16), - span_id=int(f"0x{i + 1:016x}", 16), + trace_id=int(f"0x{index + 1:032x}", 16), + span_id=int(f"0x{index + 1:016x}", 16), trace_flags=TraceFlags(1), severity_text="INFO", severity_number=SeverityNumber.INFO, diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index ad0a1ddca..5c4646612 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -105,8 +105,8 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header delays = mock_sleep.call_args_list - for i in range(len(delays)): - self.assertEqual(delays[i][0][0], 2**i) + for i, delay in enumerate(delays): + self.assertEqual(delay[0][0], 2**i) # Number of calls: 1 + len(1, 2, 4, 8, 16, 32 delays) self.assertEqual(mock_request.call_count, 7) @@ -125,8 +125,8 @@ def test_should_export_again_with_server_delay_if_retryable_and_retry_after_head result = self.exporter.export(self.logs) delays = mock_sleep.call_args_list - for i in range(len(delays)): - self.assertEqual(delays[i][0][0], 10) + for delay in delays: + self.assertEqual(delay[0][0], 10) self.assertEqual(mock_sleep.call_count, 3) self.assertEqual(mock_request.call_count, 4) @@ -152,8 +152,8 @@ def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after result = self.exporter.export(self.logs) delays = mock_sleep.call_args_list - for i in range(len(delays)): - self.assertEqual(delays[i][0][0], 2**i) + for index, delay in enumerate(delays): + self.assertEqual(delay[0][0], 2**index) self.assertEqual(mock_sleep.call_count, 3) self.assertEqual(mock_request.call_count, 4) @@ -167,18 +167,19 @@ def test_export_connection_error_retry(self, mock_request): self.assertEqual(mock_request.call_count, 2) self.assertEqual(result, LogExportResult.SUCCESS) - def generate_test_log_data(self, count=5): + @staticmethod + def generate_test_log_data(count=5): logs = [] - for i in range(count): + for index in range(count): record = LogRecord( timestamp=int(time.time_ns()), - trace_id=int(f"0x{i + 1:032x}", 16), - span_id=int(f"0x{i + 1:016x}", 16), + trace_id=int(f"0x{index + 1:032x}", 16), + span_id=int(f"0x{index + 1:016x}", 16), trace_flags=TraceFlags(1), severity_text="INFO", severity_number=SeverityNumber.INFO, - body=f"Test log {i + 1}", - attributes={"test.attribute": f"value-{i + 1}"}, + body=f"Test log {index + 1}", + attributes={"test.attribute": f"value-{index + 1}"}, ) log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) From c78aca5eeff097b9b662d4a3cde9b0f29e907aad Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:33:02 +0000 Subject: [PATCH 05/52] linting fix --- .../metrics/aws_cloudwatch_emf_exporter.py | 523 ------------------ .../aws/logs/test_otlp_aws_logs_exporter.py | 4 +- .../test_aws_cloudwatch_emf_exporter.py | 2 +- 3 files changed, 3 insertions(+), 526 deletions(-) delete mode 100644 aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py deleted file mode 100644 index e2e364b03..000000000 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/aws_cloudwatch_emf_exporter.py +++ /dev/null @@ -1,523 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 - -# pylint: disable=no-self-use - -import json -import logging -import time -import uuid -from collections import defaultdict -from typing import Any, Dict, List, Optional, Tuple - -import botocore.session -from botocore.exceptions import ClientError - -from opentelemetry.sdk.metrics import ( - Counter, - Histogram, - ObservableCounter, - ObservableGauge, - ObservableUpDownCounter, - UpDownCounter, -) -from opentelemetry.sdk.metrics._internal.point import Metric -from opentelemetry.sdk.metrics.export import ( - AggregationTemporality, - Gauge, - MetricExporter, - MetricExportResult, - MetricsData, - NumberDataPoint, -) -from opentelemetry.sdk.resources import Resource -from opentelemetry.util.types import Attributes - -logger = logging.getLogger(__name__) - - -class MetricRecord: - """The metric data unified representation of all OTel metrics for OTel to CW EMF conversion.""" - - def __init__(self, metric_name: str, metric_unit: str, metric_description: str): - """ - Initialize metric record. - - Args: - metric_name: Name of the metric - metric_unit: Unit of the metric - metric_description: Description of the metric - """ - # Instrument metadata - self.name = metric_name - self.unit = metric_unit - self.description = metric_description - - # Will be set by conversion methods - self.timestamp: Optional[int] = None - self.attributes: Attributes = {} - - # Different metric type data - only one will be set per record - self.value: Optional[float] = None - self.sum_data: Optional[Any] = None - self.histogram_data: Optional[Any] = None - self.exp_histogram_data: Optional[Any] = None - - -class AwsCloudWatchEmfExporter(MetricExporter): - """ - OpenTelemetry metrics exporter for CloudWatch EMF format. - - This exporter converts OTel metrics into CloudWatch EMF logs which are then - sent to CloudWatch Logs. CloudWatch Logs automatically extracts the metrics - from the EMF logs. - - https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html - - """ - - # CloudWatch EMF supported units - # Ref: https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_MetricDatum.html - EMF_SUPPORTED_UNITS = { - "Seconds", - "Microseconds", - "Milliseconds", - "Bytes", - "Kilobytes", - "Megabytes", - "Gigabytes", - "Terabytes", - "Bits", - "Kilobits", - "Megabits", - "Gigabits", - "Terabits", - "Percent", - "Count", - "Bytes/Second", - "Kilobytes/Second", - "Megabytes/Second", - "Gigabytes/Second", - "Terabytes/Second", - "Bits/Second", - "Kilobits/Second", - "Megabits/Second", - "Gigabits/Second", - "Terabits/Second", - "Count/Second", - "None", - } - - # OTel to CloudWatch unit mapping - # Ref: opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/grouped_metric.go#L188 - UNIT_MAPPING = { - "1": "", - "ns": "", - "ms": "Milliseconds", - "s": "Seconds", - "us": "Microseconds", - "By": "Bytes", - "bit": "Bits", - } - - def __init__( - self, - namespace: str = "default", - log_group_name: str = None, - log_stream_name: Optional[str] = None, - aws_region: Optional[str] = None, - preferred_temporality: Optional[Dict[type, AggregationTemporality]] = None, - **kwargs, - ): - """ - Initialize the CloudWatch EMF exporter. - - Args: - namespace: CloudWatch namespace for metrics - log_group_name: CloudWatch log group name - log_stream_name: CloudWatch log stream name (auto-generated if None) - aws_region: AWS region (auto-detected if None) - preferred_temporality: Optional dictionary mapping instrument types to aggregation temporality - **kwargs: Additional arguments passed to botocore client - """ - # Set up temporality preference default to DELTA if customers not set - if preferred_temporality is None: - preferred_temporality = { - Counter: AggregationTemporality.DELTA, - Histogram: AggregationTemporality.DELTA, - ObservableCounter: AggregationTemporality.DELTA, - ObservableGauge: AggregationTemporality.DELTA, - ObservableUpDownCounter: AggregationTemporality.DELTA, - UpDownCounter: AggregationTemporality.DELTA, - } - - super().__init__(preferred_temporality) - - self.namespace = namespace - self.log_group_name = log_group_name - self.log_stream_name = log_stream_name or self._generate_log_stream_name() - - session = botocore.session.Session() - self.logs_client = session.create_client("logs", region_name=aws_region, **kwargs) - - # Ensure log group exists - self._ensure_log_group_exists() - - # Ensure log stream exists - self._ensure_log_stream_exists() - - # Default to unique log stream name matching OTel Collector - # EMF Exporter behavior with language for source identification - def _generate_log_stream_name(self) -> str: - """Generate a unique log stream name.""" - - unique_id = str(uuid.uuid4())[:8] - return f"otel-python-{unique_id}" - - def _ensure_log_group_exists(self): - """Ensure the log group exists, create if it doesn't.""" - try: - self.logs_client.create_log_group(logGroupName=self.log_group_name) - logger.info("Created log group: %s", self.log_group_name) - except ClientError as error: - if error.response.get("Error", {}).get("Code") == "ResourceAlreadyExistsException": - logger.debug("Log group %s already exists", self.log_group_name) - else: - logger.error("Failed to create log group %s : %s", self.log_group_name, error) - raise - - def _ensure_log_stream_exists(self): - try: - self.logs_client.create_log_stream(logGroupName=self.log_group_name, logStreamName=self.log_stream_name) - logger.info("Created log stream: %s", self.log_stream_name) - except ClientError as error: - if error.response.get("Error", {}).get("Code") == "ResourceAlreadyExistsException": - logger.debug("Log stream %s already exists", self.log_stream_name) - else: - logger.error("Failed to create log stream %s : %s", self.log_group_name, error) - raise - - def _get_metric_name(self, record: MetricRecord) -> Optional[str]: - """Get the metric name from the metric record or data point.""" - - try: - if record.name: - return record.name - except AttributeError: - pass - # Return None if no valid metric name found - return None - - def _get_unit(self, record: MetricRecord) -> Optional[str]: - """Get CloudWatch unit from MetricRecord unit.""" - unit = record.unit - - if not unit: - return None - - # First check if unit is already a supported EMF unit - if unit in self.EMF_SUPPORTED_UNITS: - return unit - - # Map from OTel unit to CloudWatch unit - mapped_unit = self.UNIT_MAPPING.get(unit) - - return mapped_unit - - def _get_dimension_names(self, attributes: Attributes) -> List[str]: - """Extract dimension names from attributes.""" - # Implement dimension selection logic - # For now, use all attributes as dimensions - return list(attributes.keys()) - - def _get_attributes_key(self, attributes: Attributes) -> str: - """ - Create a hashable key from attributes for grouping metrics. - - Args: - attributes: The attributes dictionary - - Returns: - A string representation of sorted attributes key-value pairs - """ - # Sort the attributes to ensure consistent keys - sorted_attrs = sorted(attributes.items()) - # Create a string representation of the attributes - return str(sorted_attrs) - - def _normalize_timestamp(self, timestamp_ns: int) -> int: - """ - Normalize a nanosecond timestamp to milliseconds for CloudWatch. - - Args: - timestamp_ns: Timestamp in nanoseconds - - Returns: - Timestamp in milliseconds - """ - # Convert from nanoseconds to milliseconds - return timestamp_ns // 1_000_000 - - def _create_metric_record(self, metric_name: str, metric_unit: str, metric_description: str) -> MetricRecord: - """ - Creates the intermediate metric data structure that standardizes different otel metric representation - and will be used to generate EMF events. The base record - establishes the instrument schema (name/unit/description) that will be populated - with dimensions, timestamps, and values during metric processing. - - Args: - metric_name: Name of the metric - metric_unit: Unit of the metric - metric_description: Description of the metric - - Returns: - A MetricRecord object - """ - return MetricRecord(metric_name, metric_unit, metric_description) - - def _convert_gauge(self, metric: Metric, data_point: NumberDataPoint) -> MetricRecord: - """Convert a Gauge metric datapoint to a metric record. - - Args: - metric: The metric object - data_point: The datapoint to convert - - Returns: - MetricRecord with populated timestamp, attributes, and value - """ - # Create base record - record = self._create_metric_record(metric.name, metric.unit, metric.description) - - # Set timestamp - try: - timestamp_ms = ( - self._normalize_timestamp(data_point.time_unix_nano) - if data_point.time_unix_nano is not None - else int(time.time() * 1000) - ) - except AttributeError: - # data_point doesn't have time_unix_nano attribute - timestamp_ms = int(time.time() * 1000) - record.timestamp = timestamp_ms - - # Set attributes - try: - record.attributes = data_point.attributes - except AttributeError: - # data_point doesn't have attributes - record.attributes = {} - - # For Gauge, set the value directly - try: - record.value = data_point.value - except AttributeError: - # data_point doesn't have value - record.value = None - - return record - - def _group_by_attributes_and_timestamp(self, record: MetricRecord) -> Tuple[str, int]: - """Group metric record by attributes and timestamp. - - Args: - record: The metric record - - Returns: - A tuple key for grouping - """ - # Create a key for grouping based on attributes - attrs_key = self._get_attributes_key(record.attributes) - return (attrs_key, record.timestamp) - - def _create_emf_log( - self, metric_records: List[MetricRecord], resource: Resource, timestamp: Optional[int] = None - ) -> Dict: - """ - Create EMF log dictionary from metric records. - - Since metric_records is already grouped by attributes, this function - creates a single EMF log for all records. - """ - # Start with base structure - emf_log = {"_aws": {"Timestamp": timestamp or int(time.time() * 1000), "CloudWatchMetrics": []}} - - # Set with latest EMF version schema - # opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/metric_translator.go#L414 - emf_log["Version"] = "1" - - # Add resource attributes to EMF log but not as dimensions - # OTel collector EMF Exporter has a resource_to_telemetry_conversion flag that will convert resource attributes - # as regular metric attributes(potential dimensions). However, for this SDK EMF implementation, - # we align with the OpenTelemetry concept that all metric attributes are treated as dimensions. - # And have resource attributes as just additional metadata in EMF, added otel.resource as prefix to distinguish. - if resource and resource.attributes: - for key, value in resource.attributes.items(): - emf_log[f"otel.resource.{key}"] = str(value) - - # Initialize collections for dimensions and metrics - metric_definitions = [] - # Collect attributes from all records (they should be the same for all records in the group) - # Only collect once from the first record and apply to all records - all_attributes = metric_records[0].attributes if metric_records and metric_records[0].attributes else {} - - # Process each metric record - for record in metric_records: - - metric_name = self._get_metric_name(record) - - # Skip processing if metric name is None or empty - if not metric_name: - continue - - # Skip processing if metric value is None or empty - if record.value is None: - logger.debug("Skipping metric %s as it does not have valid metric value", metric_name) - continue - - # Create metric data dict - metric_data = {"Name": metric_name} - - unit = self._get_unit(record) - if unit: - metric_data["Unit"] = unit - - # Add to metric definitions list - metric_definitions.append(metric_data) - - emf_log[metric_name] = record.value - - # Get dimension names from collected attributes - dimension_names = self._get_dimension_names(all_attributes) - - # Add attribute values to the root of the EMF log - for name, value in all_attributes.items(): - emf_log[name] = str(value) - - # Add the single dimension set to CloudWatch Metrics if we have dimensions and metrics - if dimension_names and metric_definitions: - emf_log["_aws"]["CloudWatchMetrics"].append( - {"Namespace": self.namespace, "Dimensions": [dimension_names], "Metrics": metric_definitions} - ) - - return emf_log - - # pylint: disable=no-member - def _send_log_event(self, log_event: Dict[str, Any]): - """ - Send a log event to CloudWatch Logs. - - Basic implementation for PR 1 - sends individual events directly. - - TODO: Batching event and follow CloudWatch Logs quato constraints - number of events & size limit per payload - """ - try: - # Send the log event - response = self.logs_client.put_log_events( - logGroupName=self.log_group_name, logStreamName=self.log_stream_name, logEvents=[log_event] - ) - - logger.debug("Successfully sent log event") - return response - - except ClientError as error: - logger.debug("Failed to send log event: %s", error) - raise - - # pylint: disable=too-many-nested-blocks - def export( - self, metrics_data: MetricsData, timeout_millis: Optional[int] = None, **kwargs: Any - ) -> MetricExportResult: - """ - Export metrics as EMF logs to CloudWatch. - - Groups metrics by attributes and timestamp before creating EMF logs. - - Args: - metrics_data: MetricsData containing resource metrics and scope metrics - timeout_millis: Optional timeout in milliseconds - **kwargs: Additional keyword arguments - - Returns: - MetricExportResult indicating success or failure - """ - try: - if not metrics_data.resource_metrics: - return MetricExportResult.SUCCESS - - # Process all metrics from all resource metrics and scope metrics - for resource_metrics in metrics_data.resource_metrics: - for scope_metrics in resource_metrics.scope_metrics: - # Dictionary to group metrics by attributes and timestamp - grouped_metrics = defaultdict(list) - - # Process all metrics in this scope - for metric in scope_metrics.metrics: - # Skip if metric.data is None or no data_points exists - try: - if not (metric.data and metric.data.data_points): - continue - except AttributeError: - # Metric doesn't have data or data_points attribute - continue - - # Process metrics based on type - metric_type = type(metric.data) - if metric_type == Gauge: - for dp in metric.data.data_points: - record = self._convert_gauge(metric, dp) - grouped_metrics[self._group_by_attributes_and_timestamp(record)].append(record) - else: - logger.debug("Unsupported Metric Type: %s", metric_type) - - # Now process each group separately to create one EMF log per group - for (_, timestamp_ms), metric_records in grouped_metrics.items(): - if not metric_records: - continue - - # Create and send EMF log for this batch of metrics - self._send_log_event( - { - "message": json.dumps( - self._create_emf_log(metric_records, resource_metrics.resource, timestamp_ms) - ), - "timestamp": timestamp_ms, - } - ) - - return MetricExportResult.SUCCESS - # pylint: disable=broad-exception-caught - # capture all types of exceptions to not interrupt the instrumented services - except Exception as error: - logger.error("Failed to export metrics: %s", error) - return MetricExportResult.FAILURE - - def force_flush(self, timeout_millis: int = 10000) -> bool: - """ - Force flush any pending metrics. - - TODO: will add logic to handle gracefule shutdown - - Args: - timeout_millis: Timeout in milliseconds - - Returns: - True if successful, False otherwise - """ - logger.debug("AwsCloudWatchEmfExporter force flushes the buffered metrics") - return True - - def shutdown(self, timeout_millis: Optional[int] = None, **kwargs: Any) -> bool: - """ - Shutdown the exporter. - Override to handle timeout and other keyword arguments, but do nothing. - - TODO: will add logic to handle gracefule shutdown - - Args: - timeout_millis: Ignored timeout in milliseconds - **kwargs: Ignored additional keyword arguments - """ - # Intentionally do nothing - self.force_flush(timeout_millis) - logger.debug("AwsCloudWatchEmfExporter shutdown called with timeout_millis=%s", timeout_millis) - return True diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 5c4646612..93f507916 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -105,8 +105,8 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header delays = mock_sleep.call_args_list - for i, delay in enumerate(delays): - self.assertEqual(delay[0][0], 2**i) + for index, delay in enumerate(delays): + self.assertEqual(delay[0][0], 2**index) # Number of calls: 1 + len(1, 2, 4, 8, 16, 32 delays) self.assertEqual(mock_request.call_count, 7) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py index 3ea6031c3..01d500c70 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/metrics/test_aws_cloudwatch_emf_exporter.py @@ -8,7 +8,7 @@ from botocore.exceptions import ClientError -from amazon.opentelemetry.distro.exporter.otlp.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter +from amazon.opentelemetry.distro.exporter.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter from opentelemetry.sdk.metrics.export import Gauge, MetricExportResult from opentelemetry.sdk.resources import Resource From 12eca32e9a730ec01af0481760ee7ee668199916 Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:35:00 +0000 Subject: [PATCH 06/52] linting fix --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index f12c9330f..40a8dad84 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -42,7 +42,7 @@ def __init__( self._exporter = exporter # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 - def _export(self, batch_strategy: BatchLogExportStrategy) -> None: + def _export(self, batch_strategy: BatchLogExportStrategy) -> None: # pylint: disable=too-many-nested-blocks """ Preserves existing batching behavior but will intermediarly export small log batches if the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB. From 83ec370c7650c5a49e1ec736b0cdef3ae313e398 Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:36:49 +0000 Subject: [PATCH 07/52] linting fix --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 40a8dad84..066217498 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -42,7 +42,7 @@ def __init__( self._exporter = exporter # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 - def _export(self, batch_strategy: BatchLogExportStrategy) -> None: # pylint: disable=too-many-nested-blocks + def _export(self, batch_strategy: BatchLogExportStrategy) -> None: """ Preserves existing batching behavior but will intermediarly export small log batches if the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB. @@ -66,7 +66,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: # pylint: di log_data: LogData = self._queue.pop() log_size = self._estimate_log_size(log_data) - if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): + if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): # pylint: disable=too-many-nested-blocks # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: if self._is_gen_ai_log(batch[0]): From 79bbf464d4c123c9bb02d19f86f6b9f2adf9046b Mon Sep 17 00:00:00 2001 From: liustve Date: Fri, 20 Jun 2025 00:39:14 +0000 Subject: [PATCH 08/52] linting fix --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 066217498..3e77e710f 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -66,7 +66,9 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: log_data: LogData = self._queue.pop() log_size = self._estimate_log_size(log_data) - if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): # pylint: disable=too-many-nested-blocks + if batch and ( + batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE + ): # pylint: disable=too-many-nested-blocks # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: if self._is_gen_ai_log(batch[0]): From b6e1b97fff4ea01c892192d9ebbcd0328e766835 Mon Sep 17 00:00:00 2001 From: liustve Date: Mon, 23 Jun 2025 22:27:17 +0000 Subject: [PATCH 09/52] remove gen ai handling logic --- .../logs/aws_batch_log_record_processor.py | 42 +++------- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 79 +++---------------- .../test_aws_batch_log_record_processor.py | 42 ++-------- .../aws/logs/test_otlp_aws_logs_exporter.py | 18 ----- 4 files changed, 28 insertions(+), 153 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 3e77e710f..737463cf5 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -19,7 +19,8 @@ class AwsBatchLogRecordProcessor(BatchLogRecordProcessor): 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation ) _MAX_LOG_REQUEST_BYTE_SIZE = ( - 1048576 # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html + 1048576 # Maximum uncompressed/unserialized bytes / request - + # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html ) def __init__( @@ -41,11 +42,15 @@ def __init__( self._exporter = exporter - # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 def _export(self, batch_strategy: BatchLogExportStrategy) -> None: """ + + Explictily overrides upstream _export method to add AWS CloudWatch size-based batching + See: + https://github.com/open-telemetry/opentelemetry-python/blob/bb21ebd46d070c359eee286c97bdf53bfd06759d/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 + Preserves existing batching behavior but will intermediarly export small log batches if - the size of the data in the batch is at orabove AWS CloudWatch's maximum request size limit of 1 MB. + the size of the data in the batch is at or above AWS CloudWatch's maximum request size limit of 1 MB. - Data size of exported batches will ALWAYS be <= 1 MB except for the case below: - If the data size of an exported batch is ever > 1 MB then the batch size is guaranteed to be 1 @@ -66,14 +71,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: log_data: LogData = self._queue.pop() log_size = self._estimate_log_size(log_data) - if batch and ( - batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE - ): # pylint: disable=too-many-nested-blocks - # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 - if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: - if self._is_gen_ai_log(batch[0]): - self._exporter.set_gen_ai_log_flag() - + if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE): self._exporter.export(batch) batch_data_size = 0 batch = [] @@ -82,11 +80,6 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: batch.append(log_data) if batch: - # if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1 - if batch_data_size > self._MAX_LOG_REQUEST_BYTE_SIZE: - if self._is_gen_ai_log(batch[0]): - self._exporter.set_gen_ai_log_flag() - self._exporter.export(batch) except Exception as exception: # pylint: disable=broad-exception-caught _logger.exception("Exception while exporting logs: %s", exception) @@ -97,7 +90,7 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: Estimates the size in bytes of a log by calculating the size of its body and its attributes and adding a buffer amount to account for other log metadata information. Will process complex log structures up to the specified depth limit. - If the depth limit of the log structure is exceeded, returns truncates calculation + If the depth limit of the log structure is exceeded, returns the truncated calculation to everything up to that point. Args: @@ -153,18 +146,3 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: queue = new_queue return size - - @staticmethod - def _is_gen_ai_log(log: LogData) -> bool: - """ - Is the log a Gen AI log event? - """ - gen_ai_instrumentations = { - "openinference.instrumentation.langchain", - "openinference.instrumentation.crewai", - "opentelemetry.instrumentation.langchain", - "crewai.telemetry", - "openlit.otel.tracing", - } - - return log.instrumentation_scope.name in gen_ai_instrumentations diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 9bd75d03f..26ec07849 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -8,6 +8,7 @@ from typing import Dict, Optional, Sequence import requests +from requests.exceptions import ConnectionError from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession from opentelemetry.exporter.otlp.proto.common._internal import _create_exp_backoff_generator @@ -21,43 +22,8 @@ class OTLPAwsLogExporter(OTLPLogExporter): - """ - Below is the protobuf-JSON formatted path to "content" and "role" for the - following GenAI Consolidated Log Event Schema: - - "body": { - "output": { - "messages": [ - { - "content": "hi", - "role": "assistant" - } - ] - }, - "input": { - "messages": [ - { - "content": "hello", - "role": "user" - } - ] - } - } - - """ - - _LARGE_GEN_AI_LOG_PATH_HEADER = ( - "\\$['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['body']" # body - "['kvlistValue']['values'][*]['value']" # body['output'], body['input'] - "['kvlistValue']['values'][0]['value']" # body['output']['messages'], body['input']['messages'] - "['arrayValue']['values'][*]" # body['output']['messages'][0..999], body['input']['messages'][0..999] - "['kvlistValue']['values'][*]['value']['stringValue']" # body['output']['messages'][0..999]['content'/'role'], - # body['input']['messages'][0..999]['content'/'role'] - ) - - _LARGE_LOG_HEADER = "x-aws-truncatable-fields" - - _RETRY_AFTER_HEADER = "Retry-After" # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + + _RETRY_AFTER_HEADER = "Retry-After" # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling def __init__( self, @@ -86,24 +52,17 @@ def __init__( session=AwsAuthSession(aws_region=self._aws_region, service="logs"), ) - # https://github.com/open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py#L167 def export(self, batch: Sequence[LogData]) -> LogExportResult: """ - Exports the given batch of OTLP log data. - Behaviors of how this export will work - - - 1. Always compresses the serialized data into gzip before sending. + Exports log batch with AWS-specific enhancements over the base OTLPLogExporter. - 2. If self._gen_ai_log_flag is enabled, the log data is > 1 MB a - and the assumption is that the log is a normalized gen.ai LogEvent. - - inject the {LARGE_LOG_HEADER} into the header. + Based on upstream implementation which does not retry based on Retry-After header: + https://github.com/open-telemetry/opentelemetry-python/blob/acae2c232b101d3e447a82a7161355d66aa06fa2/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py#L167 - 3. Retry behavior is now the following: - - if the response contains a status code that is retryable and the response contains Retry-After in its - headers, the serialized data will be exported after that set delay - - - if the response does not contain that Retry-After header, default back to the current iteration of the - exponential backoff delay + Key behaviors: + 1. Always compresses data with gzip before sending + 2. Adds truncatable fields header for large Gen AI logs (>1MB) + 3. Implements Retry-After header support for throttling responses """ if self._shutdown: @@ -111,11 +70,9 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: return LogExportResult.FAILURE serialized_data = encode_logs(batch).SerializeToString() - gzip_data = BytesIO() with gzip.GzipFile(fileobj=gzip_data, mode="w") as gzip_stream: gzip_stream.write(serialized_data) - data = gzip_data.getvalue() backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) @@ -132,10 +89,9 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: resp.status_code, resp.text, ) - self._gen_ai_log_flag = False return LogExportResult.FAILURE - # https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling maybe_retry_after = resp.headers.get(self._RETRY_AFTER_HEADER, None) # Set the next retry delay to the value of the Retry-After response in the headers. @@ -154,7 +110,6 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: "Logs will not be exported.", resp.reason, ) - self._gen_ai_log_flag = False return LogExportResult.FAILURE _logger.warning( @@ -165,28 +120,19 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: sleep(delay) - def set_gen_ai_log_flag(self): - """ - Sets a flag that indicates the current log batch contains - a generative AI log record that exceeds the CloudWatch Logs size limit (1MB). - """ - self._gen_ai_log_flag = True - def _send(self, serialized_data: bytes): try: response = self._session.post( url=self._endpoint, - headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, data=serialized_data, verify=self._certificate_file, timeout=self._timeout, cert=self._client_cert, ) return response - except requests.exceptions.ConnectionError: + except ConnectionError: response = self._session.post( url=self._endpoint, - headers={self._LARGE_LOG_HEADER: self._LARGE_GEN_AI_LOG_PATH_HEADER} if self._gen_ai_log_flag else None, data=serialized_data, verify=self._certificate_file, timeout=self._timeout, @@ -199,6 +145,7 @@ def _retryable(resp: requests.Response) -> bool: """ Is it a retryable response? """ + # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling return resp.status_code in (429, 503) or OTLPLogExporter._retryable(resp) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 8e639606c..365dd1c08 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -142,7 +142,6 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): self.assertEqual(len(self.processor._queue), 0) self.assertEqual(len(actual_batch), log_count) self.mock_exporter.export.assert_called_once() - self.mock_exporter.set_gen_ai_log_flag.assert_not_called() @patch( "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", @@ -151,37 +150,12 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): - """Should make multiple export calls of batch size 1 to export logs of size > 1 MB. - But should only call set_gen_ai_log_flag if it's a Gen AI log event.""" + """Should make multiple export calls of batch size 1 to export logs of size > 1 MB.""" large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) - non_gen_ai_test_logs = self.generate_test_log_data( - log_body=large_log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=3 + test_logs = self.generate_test_log_data( + log_body=large_log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=15 ) - gen_ai_test_logs = [] - - gen_ai_scopes = [ - "openinference.instrumentation.langchain", - "openinference.instrumentation.crewai", - "opentelemetry.instrumentation.langchain", - "crewai.telemetry", - "openlit.otel.tracing", - ] - - for gen_ai_scope in gen_ai_scopes: - gen_ai_test_logs.extend( - self.generate_test_log_data( - log_body=large_log_body, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=3, - instrumentation_scope=InstrumentationScope(gen_ai_scope, "1.0.0"), - ) - ) - - test_logs = gen_ai_test_logs + non_gen_ai_test_logs for log in test_logs: self.processor._queue.appendleft(log) @@ -189,8 +163,7 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): self.processor._export(batch_strategy=BatchLogExportStrategy.EXPORT_ALL) self.assertEqual(len(self.processor._queue), 0) - self.assertEqual(self.mock_exporter.export.call_count, 3 + len(gen_ai_test_logs)) - self.assertEqual(self.mock_exporter.set_gen_ai_log_flag.call_count, len(gen_ai_test_logs)) + self.assertEqual(self.mock_exporter.export.call_count, len(test_logs)) batches = self.mock_exporter.export.call_args_list @@ -208,8 +181,6 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): large_log_body = "X" * (self.max_log_size + 1) small_log_body = "X" * (self.max_log_size // 10 - self.base_log_size) - gen_ai_scope = InstrumentationScope("openinference.instrumentation.langchain", "1.0.0") - large_logs = self.generate_test_log_data( log_body=large_log_body, attr_key="", @@ -217,7 +188,6 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): log_body_depth=-1, attr_depth=-1, count=3, - instrumentation_scope=gen_ai_scope, ) small_logs = self.generate_test_log_data( @@ -227,7 +197,6 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): log_body_depth=-1, attr_depth=-1, count=12, - instrumentation_scope=gen_ai_scope, ) # 1st, 2nd, 3rd batch = size 1 @@ -242,7 +211,6 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): self.assertEqual(len(self.processor._queue), 0) self.assertEqual(self.mock_exporter.export.call_count, 5) - self.assertEqual(self.mock_exporter.set_gen_ai_log_flag.call_count, 3) batches = self.mock_exporter.export.call_args_list @@ -294,7 +262,7 @@ def generate_nested_value(depth, value, create_map=True) -> AnyValue: attributes={attr_key: generate_nested_value(attr_depth, attr_val, create_map)}, ) - log_data = LogData(log_record=record, instrumentation_scope=instrumentation_scope) + log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) logs.append(log_data) return logs diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 93f507916..31e401643 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -56,24 +56,6 @@ def test_export_success(self, mock_request): self.assertTrue(len(data) >= 10) self.assertEqual(data[0:2], b"\x1f\x8b") - @patch("requests.Session.post", return_value=good_response) - def test_export_gen_ai_logs(self, mock_request): - """Tests that when set_gen_ai_log_flag is set, the exporter includes the LLO header in the request.""" - - self.exporter.set_gen_ai_log_flag() - - result = self.exporter.export(self.logs) - - mock_request.assert_called_once() - - _, kwargs = mock_request.call_args - headers = kwargs.get("headers", None) - - self.assertEqual(result, LogExportResult.SUCCESS) - self.assertIsNotNone(headers) - self.assertIn(self.exporter._LARGE_LOG_HEADER, headers) - self.assertEqual(headers[self.exporter._LARGE_LOG_HEADER], self.exporter._LARGE_GEN_AI_LOG_PATH_HEADER) - @patch("requests.Session.post", return_value=good_response) def test_should_not_export_if_shutdown(self, mock_request): """Tests that no export request is made if the exporter is shutdown.""" From 17d0f90864818c8206575e150d5a3d7b5a908155 Mon Sep 17 00:00:00 2001 From: liustve Date: Mon, 23 Jun 2025 22:28:51 +0000 Subject: [PATCH 10/52] fixed linting --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 737463cf5..1fe961e33 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -45,7 +45,7 @@ def __init__( def _export(self, batch_strategy: BatchLogExportStrategy) -> None: """ - Explictily overrides upstream _export method to add AWS CloudWatch size-based batching + Explicitly overrides upstream _export method to add AWS CloudWatch size-based batching See: https://github.com/open-telemetry/opentelemetry-python/blob/bb21ebd46d070c359eee286c97bdf53bfd06759d/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 From 3d12858775fd4d3ebed6924fb0d41b64f53ea257 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 03:13:31 +0000 Subject: [PATCH 11/52] refactor _init_logging to 1.33.1 version --- .../distro/aws_opentelemetry_configurator.py | 57 +++++++++++-------- .../logs/aws_batch_log_record_processor.py | 1 - .../otlp/aws/logs/otlp_aws_logs_exporter.py | 9 ++- 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index e39c916c5..f62ed77da 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -1,9 +1,9 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. +import logging import os import re -from logging import NOTSET, Logger, getLogger from typing import ClassVar, Dict, List, Optional, Type, Union from importlib_metadata import version @@ -29,6 +29,7 @@ from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler from amazon.opentelemetry.distro.scope_based_exporter import ScopeBasedPeriodicExportingMetricReader from amazon.opentelemetry.distro.scope_based_filtering_view import ScopeBasedRetainingView +from opentelemetry._events import set_event_logger_provider from opentelemetry._logs import get_logger_provider, set_logger_provider from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter @@ -43,7 +44,9 @@ _import_id_generator, _import_sampler, _OTelSDKConfigurator, + _patch_basic_config, ) +from opentelemetry.sdk._events import EventLoggerProvider from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler from opentelemetry.sdk._logs.export import BatchLogRecordProcessor, LogExporter from opentelemetry.sdk.environment_variables import ( @@ -133,7 +136,7 @@ def _configure(self, **kwargs): # The OpenTelemetry Authors code # Long term, we wish to contribute this to upstream to improve initialization customizability and reduce dependency on # internal logic. -def _initialize_components(): +def _initialize_components(setup_logging_handler: bool | None = None): trace_exporters, metric_exporters, log_exporters = _import_exporters( _get_exporter_names("traces"), _get_exporter_names("metrics"), @@ -170,38 +173,37 @@ def _initialize_components(): resource=resource, ) _init_metrics(metric_exporters, resource) - logging_enabled = os.getenv(_OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, "false") - if logging_enabled.strip().lower() == "true": - _init_logging(log_exporters, resource) + + if setup_logging_handler is None: + setup_logging_handler = ( + os.getenv(_OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, "false").strip().lower() == "true" + ) + _init_logging(log_exporters, resource, setup_logging_handler) def _init_logging( - exporters: Dict[str, Type[LogExporter]], - resource: Resource = None, + exporters: dict[str, Type[LogExporter]], + resource: Resource | None = None, + setup_logging_handler: bool = True, ): - - # Provides a default OTLP log exporter when it's not set. - # This is the behavior for the logs exporters for other languages. - logs_exporters = os.environ.get("OTEL_LOGS_EXPORTER") - - if not exporters and logs_exporters and logs_exporters.lower() != "none": - exporters = {"otlp": OTLPLogExporter} - provider = LoggerProvider(resource=resource) set_logger_provider(provider) for _, exporter_class in exporters.items(): - exporter_args: Dict[str, any] = {} - log_exporter = _customize_logs_exporter(exporter_class(**exporter_args), resource) + exporter_args = {} + log_exporter: LogExporter = _customize_logs_exporter(exporter_class(**exporter_args)) + log_processor = _customize_log_record_processor(log_exporter) + provider.add_log_record_processor(log_processor) - if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): - provider.add_log_record_processor(AwsBatchLogRecordProcessor(exporter=log_exporter)) - else: - provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter)) + event_logger_provider = EventLoggerProvider(logger_provider=provider) + set_event_logger_provider(event_logger_provider) - handler = LoggingHandler(level=NOTSET, logger_provider=provider) + if setup_logging_handler: + _patch_basic_config() - getLogger().addHandler(handler) + # Add OTel handler + handler = LoggingHandler(level=logging.NOTSET, logger_provider=provider) + logging.getLogger().addHandler(handler) def _init_tracing( @@ -390,7 +392,14 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> return AwsMetricAttributesSpanExporterBuilder(span_exporter, resource).build() -def _customize_logs_exporter(log_exporter: LogExporter, resource: Resource) -> LogExporter: +def _customize_log_record_processor(log_exporter: LogExporter): + if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): + return AwsBatchLogRecordProcessor(exporter=log_exporter) + + return BatchLogRecordProcessor(exporter=log_exporter) + + +def _customize_logs_exporter(log_exporter: LogExporter) -> LogExporter: logs_endpoint = os.environ.get(OTEL_EXPORTER_OTLP_LOGS_ENDPOINT) if _is_aws_otlp_endpoint(logs_endpoint, "logs"): diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 1fe961e33..08a99ced5 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -44,7 +44,6 @@ def __init__( def _export(self, batch_strategy: BatchLogExportStrategy) -> None: """ - Explicitly overrides upstream _export method to add AWS CloudWatch size-based batching See: https://github.com/open-telemetry/opentelemetry-python/blob/bb21ebd46d070c359eee286c97bdf53bfd06759d/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 26ec07849..fd9830d9a 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -7,7 +7,7 @@ from time import sleep from typing import Dict, Optional, Sequence -import requests +from requests import Response from requests.exceptions import ConnectionError from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession @@ -34,7 +34,6 @@ def __init__( headers: Optional[Dict[str, str]] = None, timeout: Optional[int] = None, ): - self._gen_ai_log_flag = False self._aws_region = None if endpoint: @@ -77,6 +76,10 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) + # This loop will eventually exit via one of three conditions: + # 1. Successful response (resp.ok) + # 2. Non-retryable error (4xx status codes except 429) + # 3. Retry exponential backoff timeout exhausted and no Retry-After header available while True: resp = self._send(data) @@ -141,7 +144,7 @@ def _send(self, serialized_data: bytes): return response @staticmethod - def _retryable(resp: requests.Response) -> bool: + def _retryable(resp: Response) -> bool: """ Is it a retryable response? """ From 7f90bc79de823b562d6176606fc1e2d9f37fdd33 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 03:53:45 +0000 Subject: [PATCH 12/52] refactored batch log record processor --- .../distro/aws_opentelemetry_configurator.py | 6 +- .../logs/aws_batch_log_record_processor.py | 15 ++++- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 66 ++++++++++--------- .../test_aws_batch_log_record_processor.py | 4 +- 4 files changed, 54 insertions(+), 37 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index f62ed77da..aa181de43 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -22,7 +22,7 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsBatchLogRecordProcessor +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsCloudWatchOtlpBatchLogRecordProcessor from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter @@ -106,7 +106,7 @@ # UDP package size is not larger than 64KB LAMBDA_SPAN_EXPORT_BATCH_SIZE = 10 -_logger: Logger = getLogger(__name__) +_logger: logging.Logger = logging.getLogger(__name__) class AwsOpenTelemetryConfigurator(_OTelSDKConfigurator): @@ -394,7 +394,7 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> def _customize_log_record_processor(log_exporter: LogExporter): if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): - return AwsBatchLogRecordProcessor(exporter=log_exporter) + return AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=log_exporter) return BatchLogRecordProcessor(exporter=log_exporter) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 08a99ced5..dc81875bc 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -14,7 +14,20 @@ _logger = logging.getLogger(__name__) -class AwsBatchLogRecordProcessor(BatchLogRecordProcessor): +class AwsCloudWatchOtlpBatchLogRecordProcessor(BatchLogRecordProcessor): + """ + Custom implementation of BatchLogRecordProcessor that manages log record batching + with size-based constraints to prevent exceeding AWS CloudWatch Logs OTLP endpoint request size limits. + + This processor still exports all logs up to _max_export_batch_size but rather than doing exactly + one export, we will estimate log sizes and do multiple batch exports + where each exported batch will have an additonal constraint: + + If the batch to be exported will have a data size of > 1 MB: + The batch will be split into multiple exports of sub-batches of data size <= 1 MB. + + A unique case is if the sub-batch is of data size > 1 MB, then the sub-batch will have exactly 1 log in it. + """ _BASE_LOG_BUFFER_BYTE_SIZE = ( 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation ) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index fd9830d9a..8ce8f1a8b 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -9,6 +9,7 @@ from requests import Response from requests.exceptions import ConnectionError +from requests.structures import CaseInsensitiveDict from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession from opentelemetry.exporter.otlp.proto.common._internal import _create_exp_backoff_generator @@ -76,43 +77,28 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) - # This loop will eventually exit via one of three conditions: - # 1. Successful response (resp.ok) - # 2. Non-retryable error (4xx status codes except 429) - # 3. Retry exponential backoff timeout exhausted and no Retry-After header available while True: resp = self._send(data) if resp.ok: return LogExportResult.SUCCESS - if not self._retryable(resp): - _logger.error( - "Failed to export logs batch code: %s, reason: %s", - resp.status_code, - resp.text, - ) - return LogExportResult.FAILURE - - # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling - maybe_retry_after = resp.headers.get(self._RETRY_AFTER_HEADER, None) - - # Set the next retry delay to the value of the Retry-After response in the headers. - # If Retry-After is not present in the headers, default to the next iteration of the - # exponential backoff strategy. - - delay = self._parse_retryable_header(maybe_retry_after) - - if delay == -1: - delay = next(backoff, self._MAX_RETRY_TIMEOUT) - - if delay == self._MAX_RETRY_TIMEOUT: - _logger.error( - "Transient error %s encountered while exporting logs batch. " - "No Retry-After header found and all backoff retries exhausted. " - "Logs will not be exported.", - resp.reason, - ) + delay = self._get_retry_delay_sec(resp.headers, backoff) + is_retryable = self._retryable(resp) + + if not is_retryable or delay == self._MAX_RETRY_TIMEOUT: + if is_retryable: + _logger.error( + "Failed to export logs due to retries exhausted " + "after transient error %s encountered while exporting logs batch", + resp.reason, + ) + else: + _logger.error( + "Failed to export logs batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) return LogExportResult.FAILURE _logger.warning( @@ -152,6 +138,24 @@ def _retryable(resp: Response) -> bool: return resp.status_code in (429, 503) or OTLPLogExporter._retryable(resp) + def _get_retry_delay_sec(self, headers: CaseInsensitiveDict, backoff) -> float: + """ + Get retry delay in seconds from headers or backoff strategy. + """ + # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling + maybe_retry_after = headers.get(self._RETRY_AFTER_HEADER, None) + + # Set the next retry delay to the value of the Retry-After response in the headers. + # If Retry-After is not present in the headers, default to the next iteration of the + # exponential backoff strategy. + + delay = self._parse_retryable_header(maybe_retry_after) + + if delay == -1: + delay = next(backoff, self._MAX_RETRY_TIMEOUT) + + return delay + @staticmethod def _parse_retryable_header(retry_header: Optional[str]) -> float: """ diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 365dd1c08..62673c566 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -6,7 +6,7 @@ from unittest.mock import MagicMock, patch from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( - AwsBatchLogRecordProcessor, + AwsCloudWatchOtlpBatchLogRecordProcessor, BatchLogExportStrategy, ) from opentelemetry._logs.severity import SeverityNumber @@ -23,7 +23,7 @@ def setUp(self): self.mock_exporter = MagicMock() self.mock_exporter.export.return_value = LogExportResult.SUCCESS - self.processor = AwsBatchLogRecordProcessor(exporter=self.mock_exporter) + self.processor = AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=self.mock_exporter) self.max_log_size = self.processor._MAX_LOG_REQUEST_BYTE_SIZE self.base_log_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE From 4b7bb0e12dabf7232658c28a1ce5fa846a8d4cca Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 03:59:05 +0000 Subject: [PATCH 13/52] linting --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 4 +++- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index aa181de43..f6ce7fb57 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -22,7 +22,9 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import AwsCloudWatchOtlpBatchLogRecordProcessor +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( + AwsCloudWatchOtlpBatchLogRecordProcessor, +) from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index dc81875bc..0568c9296 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -21,13 +21,14 @@ class AwsCloudWatchOtlpBatchLogRecordProcessor(BatchLogRecordProcessor): This processor still exports all logs up to _max_export_batch_size but rather than doing exactly one export, we will estimate log sizes and do multiple batch exports - where each exported batch will have an additonal constraint: + where each exported batch will have an additional constraint: If the batch to be exported will have a data size of > 1 MB: The batch will be split into multiple exports of sub-batches of data size <= 1 MB. A unique case is if the sub-batch is of data size > 1 MB, then the sub-batch will have exactly 1 log in it. """ + _BASE_LOG_BUFFER_BYTE_SIZE = ( 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation ) From 8c64adbdda1c3e5536ec5dfcee4629b292a56e81 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 04:00:57 +0000 Subject: [PATCH 14/52] lint fix --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index f6ce7fb57..863ca0dff 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -138,7 +138,7 @@ def _configure(self, **kwargs): # The OpenTelemetry Authors code # Long term, we wish to contribute this to upstream to improve initialization customizability and reduce dependency on # internal logic. -def _initialize_components(setup_logging_handler: bool | None = None): +def _initialize_components(setup_logging_handler: Optional[bool] = None): trace_exporters, metric_exporters, log_exporters = _import_exporters( _get_exporter_names("traces"), _get_exporter_names("metrics"), From 01e3fd8da3704cea4e1cc90b6a6f54e3b96a0b0a Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 04:18:59 +0000 Subject: [PATCH 15/52] update configuration and tests --- .../distro/aws_opentelemetry_configurator.py | 2 +- .../distro/test_aws_opentelementry_configurator.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 863ca0dff..b9fe22afd 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -185,7 +185,7 @@ def _initialize_components(setup_logging_handler: Optional[bool] = None): def _init_logging( exporters: dict[str, Type[LogExporter]], - resource: Resource | None = None, + resource: Optional[Resource] = None, setup_logging_handler: bool = True, ): provider = LoggerProvider(resource=resource) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index dbaee3c33..5b81be9f8 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -496,6 +496,7 @@ def test_customize_span_exporter_sigv4(self): OTLPAwsSpanExporter, AwsAuthSession, Compression.NoCompression, + Resource.get_empty(), ) for config in bad_configs: @@ -506,6 +507,7 @@ def test_customize_span_exporter_sigv4(self): OTLPSpanExporter, Session, Compression.NoCompression, + Resource.get_empty(), ) self.assertIsInstance( @@ -610,12 +612,12 @@ def test_customize_logs_exporter_sigv4(self): ) self.assertIsInstance( - _customize_logs_exporter(OTLPGrpcLogExporter(), Resource.get_empty()), OTLPGrpcLogExporter + _customize_logs_exporter(OTLPGrpcLogExporter()), OTLPGrpcLogExporter ) # Need to patch all of these to prevent some weird multi-threading error with the LogProvider @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.LoggingHandler", return_value=MagicMock()) - @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.getLogger", return_value=MagicMock()) + @patch("logging.getLogger", return_value=MagicMock()) @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._customize_logs_exporter") @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.LoggerProvider", return_value=MagicMock()) @patch( @@ -832,12 +834,13 @@ def customize_exporter_test( expected_exporter_type, expected_session, expected_compression, + *args ): for key, value in config.items(): os.environ[key] = value try: - result = executor(default_exporter, Resource.get_empty()) + result = executor(default_exporter, *args) self.assertIsInstance(result, expected_exporter_type) self.assertIsInstance(result._session, expected_session) self.assertEqual(result._compression, expected_compression) From 2f0268cf5fd92ef4193998674b32ce18b9e4c5eb Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 04:21:09 +0000 Subject: [PATCH 16/52] lint fix --- .../distro/test_aws_opentelementry_configurator.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 5b81be9f8..5bd05677d 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -611,9 +611,7 @@ def test_customize_logs_exporter_sigv4(self): config, _customize_logs_exporter, OTLPLogExporter(), OTLPLogExporter, Session, Compression.NoCompression ) - self.assertIsInstance( - _customize_logs_exporter(OTLPGrpcLogExporter()), OTLPGrpcLogExporter - ) + self.assertIsInstance(_customize_logs_exporter(OTLPGrpcLogExporter()), OTLPGrpcLogExporter) # Need to patch all of these to prevent some weird multi-threading error with the LogProvider @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.LoggingHandler", return_value=MagicMock()) @@ -827,14 +825,7 @@ def test_customize_metric_exporter(self): os.environ.pop("OTEL_METRIC_EXPORT_INTERVAL", None) def customize_exporter_test( - self, - config, - executor, - default_exporter, - expected_exporter_type, - expected_session, - expected_compression, - *args + self, config, executor, default_exporter, expected_exporter_type, expected_session, expected_compression, *args ): for key, value in config.items(): os.environ[key] = value From 7dbcb7e9e6721872fab2816415129939e22f2996 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 24 Jun 2025 04:26:10 +0000 Subject: [PATCH 17/52] linting fix --- .../distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 8ce8f1a8b..845a80ecb 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -8,7 +8,7 @@ from typing import Dict, Optional, Sequence from requests import Response -from requests.exceptions import ConnectionError +from requests.exceptions import ConnectionError as RequestsConnectionError from requests.structures import CaseInsensitiveDict from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession @@ -119,7 +119,7 @@ def _send(self, serialized_data: bytes): cert=self._client_cert, ) return response - except ConnectionError: + except RequestsConnectionError: response = self._session.post( url=self._endpoint, data=serialized_data, From 48258c3afc7bd9250bdf855ae184b5eb46d6435b Mon Sep 17 00:00:00 2001 From: liustve Date: Sat, 28 Jun 2025 17:17:02 +0000 Subject: [PATCH 18/52] linting fix --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index bebaf830d..a861ae3db 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -4,7 +4,7 @@ import logging import os import re -from logging import NOTSET, Logger, getLogger +from logging import Logger, getLogger from typing import ClassVar, Dict, List, NamedTuple, Optional, Type, Union from importlib_metadata import version From 57bc7720e8a080e24856d58519b0b8581f06b1e9 Mon Sep 17 00:00:00 2001 From: liustve Date: Sat, 28 Jun 2025 17:43:38 +0000 Subject: [PATCH 19/52] add cycle detection --- .../logs/aws_batch_log_record_processor.py | 23 +++++++++++++++---- .../test_aws_batch_log_record_processor.py | 18 ++++++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 0568c9296..41ada5926 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -103,7 +103,8 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: Estimates the size in bytes of a log by calculating the size of its body and its attributes and adding a buffer amount to account for other log metadata information. Will process complex log structures up to the specified depth limit. - If the depth limit of the log structure is exceeded, returns the truncated calculation + Includes cycle detection to prevent processing the same complex log content (Maps, Arrays) + more than once. If the depth limit of the log structure is exceeded, returns the truncated calculation to everything up to that point. Args: @@ -114,9 +115,13 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: int: The estimated size of the log object in bytes """ - # Use a queue to prevent excessive recursive calls. - # We calculate based on the size of the log record body and attributes for the log. - queue: List[tuple[AnyValue, int]] = [(log.log_record.body, 0), (log.log_record.attributes, -1)] + # Queue is a list of (log_content, depth) where: + # log_content is the current piece of log data being processed + # depth tracks how many levels deep we've traversed to reach this data + queue = [(log.log_record.body, 0), (log.log_record.attributes, -1)] + + # Track visited objects to avoid calculating the same complex log content more than once + visited = set() size: int = self._BASE_LOG_BUFFER_BYTE_SIZE @@ -130,6 +135,9 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: next_val, current_depth = data + if not next_val: + continue + if isinstance(next_val, (str, bytes)): size += len(next_val) continue @@ -142,7 +150,14 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: size += len(str(next_val)) continue + # next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"], if current_depth <= depth: + # Guaranteed to be unique, see: https://www.w3schools.com/python/ref_func_id.asp + obj_id = id(next_val) + if obj_id in visited: + continue + visited.add(obj_id) + if isinstance(next_val, Sequence): for content in next_val: new_queue.append((cast(AnyValue, content), current_depth + 1)) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 62673c566..20377b83a 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -114,6 +114,23 @@ def test_process_log_data_primitive(self): self.assertEqual(actual_size, expected_size) + def test_process_log_data_with_cycle(self): + """Test that processor handles processing logs with circular references only once""" + cyclic_dict: dict = {"data": "test"} + cyclic_dict["self_ref"] = cyclic_dict + + log = self.generate_test_log_data( + log_body=cyclic_dict, + attr_key="", + attr_val="", + log_body_depth=-1, + attr_depth=-1, + count=1, + ) + expected_size = self.base_log_size + len("data") + len("self_ref") + len("test") + actual_size = self.processor._estimate_log_size(log[0]) + self.assertEqual(actual_size, expected_size) + @patch( "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", return_value=MagicMock(), @@ -236,7 +253,6 @@ def generate_test_log_data( attr_depth=3, count=5, create_map=True, - instrumentation_scope=InstrumentationScope("test-scope", "1.0.0"), ) -> List[LogData]: def generate_nested_value(depth, value, create_map=True) -> AnyValue: From 7da6c75b910b8a2c9f24cac2d42ac003122254cc Mon Sep 17 00:00:00 2001 From: liustve Date: Sat, 28 Jun 2025 18:42:48 +0000 Subject: [PATCH 20/52] add cycle detection unit tests --- .../logs/aws_batch_log_record_processor.py | 30 +-- .../test_aws_batch_log_record_processor.py | 176 ++++++++---------- 2 files changed, 97 insertions(+), 109 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 41ada5926..cea4a5404 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -103,8 +103,8 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: Estimates the size in bytes of a log by calculating the size of its body and its attributes and adding a buffer amount to account for other log metadata information. Will process complex log structures up to the specified depth limit. - Includes cycle detection to prevent processing the same complex log content (Maps, Arrays) - more than once. If the depth limit of the log structure is exceeded, returns the truncated calculation + Includes cycle detection to prevent processing the log content more than once. + If the depth limit of the log structure is exceeded, returns the truncated calculation to everything up to that point. Args: @@ -115,12 +115,15 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: int: The estimated size of the log object in bytes """ - # Queue is a list of (log_content, depth) where: - # log_content is the current piece of log data being processed - # depth tracks how many levels deep we've traversed to reach this data + # Queue contains tuples of (log_content, depth) where: + # - log_content is the current piece of log data being processed + # - depth tracks how many levels deep we've traversed to reach this content + # - body starts at depth 0 since it's an AnyValue object + # - Attributes start at depth -1 since it's a Mapping[str, AnyValue] - when traversed, we will + # start processing its keys at depth 0 queue = [(log.log_record.body, 0), (log.log_record.attributes, -1)] - # Track visited objects to avoid calculating the same complex log content more than once + # Track visited complex log contents to avoid calculating the same one more than once visited = set() size: int = self._BASE_LOG_BUFFER_BYTE_SIZE @@ -135,25 +138,26 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: next_val, current_depth = data - if not next_val: - continue - - if isinstance(next_val, (str, bytes)): - size += len(next_val) + if next_val is None: continue if isinstance(next_val, bool): size += 4 if next_val else 5 continue + if isinstance(next_val, (str, bytes)): + size += len(next_val) + continue + if isinstance(next_val, (float, int)): size += len(str(next_val)) continue # next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"], if current_depth <= depth: - # Guaranteed to be unique, see: https://www.w3schools.com/python/ref_func_id.asp - obj_id = id(next_val) + obj_id = id( + next_val + ) # Guaranteed to be unique, see: https://www.w3schools.com/python/ref_func_id.asp if obj_id in visited: continue visited.add(obj_id) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 20377b83a..a6fc7baa3 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -24,75 +24,95 @@ def setUp(self): self.mock_exporter.export.return_value = LogExportResult.SUCCESS self.processor = AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=self.mock_exporter) - self.max_log_size = self.processor._MAX_LOG_REQUEST_BYTE_SIZE - self.base_log_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE def test_process_log_data_nested_structure(self): """Tests that the processor correctly handles nested structures (dict/list)""" - message_size = 400 - message = "X" * message_size + log_body = "X" * 400 + log_key = "test" + log_depth = 2 - nest_dict_log = self.generate_test_log_data( - log_body=message, attr_key="t", attr_val=message, log_body_depth=2, attr_depth=2, count=1, create_map=True + nested_dict_log = self.generate_test_log_data( + log_body=log_body, log_key=log_key, log_body_depth=log_depth, count=1, create_map=True ) - nest_array_log = self.generate_test_log_data( - log_body=message, attr_key="t", attr_val=message, log_body_depth=2, attr_depth=2, count=1, create_map=False + nested_array_log = self.generate_test_log_data( + log_body=log_body, log_key=log_key, log_body_depth=log_depth, count=1, create_map=False ) - expected_size = self.base_log_size + message_size * 2 + expected_dict_size = len(log_key) * log_depth + len(log_body) + expected_array_size = len(log_body) + + dict_size = self.processor._estimate_log_size(log=nested_dict_log[0], depth=log_depth) + array_size = self.processor._estimate_log_size(log=nested_array_log[0], depth=log_depth) + + self.assertEqual(dict_size - self.processor._BASE_LOG_BUFFER_BYTE_SIZE, expected_dict_size) + self.assertEqual(array_size - self.processor._BASE_LOG_BUFFER_BYTE_SIZE, expected_array_size) + + def test_process_log_data_with_attributes(self): + """Tests that the processor correctly handles both body and attributes""" + log_body = "test_body" + attr_key = "attr_key" + attr_value = "attr_value" + + record = LogRecord( + timestamp=int(time.time_ns()), + trace_id=0x123456789ABCDEF0123456789ABCDEF0, + span_id=0x123456789ABCDEF0, + trace_flags=TraceFlags(1), + severity_text="INFO", + severity_number=SeverityNumber.INFO, + body=log_body, + attributes={attr_key: attr_value}, + ) + log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) - dict_size = self.processor._estimate_log_size(log=nest_dict_log[0], depth=2) - array_size = self.processor._estimate_log_size(log=nest_array_log[0], depth=2) + expected_size = len(log_body) + len(attr_key) + len(attr_value) + actual_size = self.processor._estimate_log_size(log_data) - # Asserting almost equal to account for dictionary keys in the Log object - self.assertAlmostEqual(dict_size, expected_size, delta=10) - self.assertAlmostEqual(array_size, expected_size, delta=10) + self.assertEqual(actual_size - self.processor._BASE_LOG_BUFFER_BYTE_SIZE, expected_size) def test_process_log_data_nested_structure_exceeds_depth(self): """Tests that the processor cuts off calculation for nested structure that exceeds the depth limit""" - calculated = "X" * 400 - message = {"calculated": calculated, "truncated": {"truncated": {"test": "X" * self.max_log_size}}} - - # *2 since we set this message in both body and attributes - expected_size = self.base_log_size + (len("calculated") + len(calculated) + len("truncated")) * 2 + max_depth = 0 + calculated_body = "X" * 400 + log_body = { + "calculated": "X" * 400, + "restOfThisLogWillBeTruncated": {"truncated": {"test": "X" * self.processor._MAX_LOG_REQUEST_BYTE_SIZE}}, + } - nest_dict_log = self.generate_test_log_data( - log_body=message, attr_key="t", attr_val=message, log_body_depth=3, attr_depth=3, count=1, create_map=True - ) - nest_array_log = self.generate_test_log_data( - log_body=message, attr_key="t", attr_val=message, log_body_depth=3, attr_depth=3, count=1, create_map=False + expected_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE + ( + len("calculated") + len(calculated_body) + len("restOfThisLogWillBeTruncated") ) - # Only calculates log size of up to depth of 4 - dict_size = self.processor._estimate_log_size(log=nest_dict_log[0], depth=4) - array_size = self.processor._estimate_log_size(log=nest_array_log[0], depth=4) + test_logs = self.generate_test_log_data(log_body=log_body, count=1) - # Asserting almost equal to account for dictionary keys in the Log object body - self.assertAlmostEqual(dict_size, expected_size, delta=10) - self.assertAlmostEqual(array_size, expected_size, delta=10) + # Only calculates log size of up to depth of 0 + dict_size = self.processor._estimate_log_size(log=test_logs[0], depth=max_depth) + + self.assertEqual(dict_size, expected_size) def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): """Tests that the processor returns prematurely if the size already exceeds _MAX_LOG_REQUEST_BYTE_SIZE""" - # Should stop calculation at bigKey - message = { - "bigKey": "X" * (self.max_log_size), - "smallKey": "X" * (self.max_log_size * 10), + # Should stop calculation at bigKey + biggerKey and not calculate the content of biggerKey + log_body = { + "bigKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE), + "biggerKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE * 100), } - expected_size = self.base_log_size + self.max_log_size + len("bigKey") - - nest_dict_log = self.generate_test_log_data( - log_body=message, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=1, create_map=True - ) - nest_array_log = self.generate_test_log_data( - log_body=message, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=1, create_map=False + expected_size = ( + self.processor._BASE_LOG_BUFFER_BYTE_SIZE + + self.processor._MAX_LOG_REQUEST_BYTE_SIZE + + len("bigKey") + + len("biggerKey") ) + nest_dict_log = self.generate_test_log_data(log_body=log_body, count=1, create_map=True) + nest_array_log = self.generate_test_log_data(log_body=log_body, count=1, create_map=False) + dict_size = self.processor._estimate_log_size(log=nest_dict_log[0]) array_size = self.processor._estimate_log_size(log=nest_array_log[0]) - self.assertAlmostEqual(dict_size, expected_size, delta=10) - self.assertAlmostEqual(array_size, expected_size, delta=10) + self.assertEqual(dict_size, expected_size) + self.assertEqual(array_size, expected_size) def test_process_log_data_primitive(self): @@ -100,18 +120,9 @@ def test_process_log_data_primitive(self): expected_sizes = [4, 4, 1, 3, 4, 5, 0] for index, primitive in enumerate(primitives): - log = self.generate_test_log_data( - log_body=primitive, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=1, - ) - - expected_size = self.base_log_size + expected_sizes[index] + log = self.generate_test_log_data(log_body=primitive, count=1) + expected_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE + expected_sizes[index] actual_size = self.processor._estimate_log_size(log[0]) - self.assertEqual(actual_size, expected_size) def test_process_log_data_with_cycle(self): @@ -119,15 +130,8 @@ def test_process_log_data_with_cycle(self): cyclic_dict: dict = {"data": "test"} cyclic_dict["self_ref"] = cyclic_dict - log = self.generate_test_log_data( - log_body=cyclic_dict, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=1, - ) - expected_size = self.base_log_size + len("data") + len("self_ref") + len("test") + log = self.generate_test_log_data(log_body=cyclic_dict, count=1) + expected_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE + len("data") + len("self_ref") + len("test") actual_size = self.processor._estimate_log_size(log[0]) self.assertEqual(actual_size, expected_size) @@ -141,9 +145,7 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): """Tests that export is only called once if a single batch is under the size limit""" log_count = 10 log_body = "test" - test_logs = self.generate_test_log_data( - log_body=log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=log_count - ) + test_logs = self.generate_test_log_data(log_body=log_body, count=log_count) total_data_size = 0 for log in test_logs: @@ -170,9 +172,7 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): """Should make multiple export calls of batch size 1 to export logs of size > 1 MB.""" large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) - test_logs = self.generate_test_log_data( - log_body=large_log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=15 - ) + test_logs = self.generate_test_log_data(log_body=large_log_body, count=15) for log in test_logs: self.processor._queue.appendleft(log) @@ -195,26 +195,13 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): """Should make calls to export smaller sub-batch logs""" - large_log_body = "X" * (self.max_log_size + 1) - small_log_body = "X" * (self.max_log_size // 10 - self.base_log_size) - - large_logs = self.generate_test_log_data( - log_body=large_log_body, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=3, + large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) + small_log_body = "X" * ( + self.processor._MAX_LOG_REQUEST_BYTE_SIZE // 10 - self.processor._BASE_LOG_BUFFER_BYTE_SIZE ) - small_logs = self.generate_test_log_data( - log_body=small_log_body, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=12, - ) + large_logs = self.generate_test_log_data(log_body=large_log_body, count=3) + small_logs = self.generate_test_log_data(log_body=small_log_body, count=12) # 1st, 2nd, 3rd batch = size 1 # 4th batch = size 10 @@ -247,20 +234,18 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): @staticmethod def generate_test_log_data( log_body, - attr_key, - attr_val, - log_body_depth=3, - attr_depth=3, + log_key="key", + log_body_depth=0, count=5, create_map=True, ) -> List[LogData]: def generate_nested_value(depth, value, create_map=True) -> AnyValue: - if depth < 0: + if depth <= 0: return value if create_map: - return {"t": generate_nested_value(depth - 1, value, True)} + return {log_key: generate_nested_value(depth - 1, value, True)} return [generate_nested_value(depth - 1, value, False)] @@ -269,13 +254,12 @@ def generate_nested_value(depth, value, create_map=True) -> AnyValue: for index in range(count): record = LogRecord( timestamp=int(time.time_ns()), - trace_id=int(f"0x{index + 1:032x}", 16), - span_id=int(f"0x{index + 1:016x}", 16), + trace_id=0x123456789ABCDEF0123456789ABCDEF0, + span_id=0x123456789ABCDEF0, trace_flags=TraceFlags(1), severity_text="INFO", severity_number=SeverityNumber.INFO, body=generate_nested_value(log_body_depth, log_body, create_map), - attributes={attr_key: generate_nested_value(attr_depth, attr_val, create_map)}, ) log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) From ddea4042eeb820b41ca6290382ff1245f6ac0c0f Mon Sep 17 00:00:00 2001 From: liustve Date: Sat, 28 Jun 2025 18:44:39 +0000 Subject: [PATCH 21/52] linting fix --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index cea4a5404..7d95c70ab 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import logging -from typing import List, Mapping, Optional, Sequence, cast +from typing import Mapping, Optional, Sequence, cast from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY, attach, detach, set_value @@ -129,7 +129,7 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: size: int = self._BASE_LOG_BUFFER_BYTE_SIZE while queue: - new_queue: List[tuple[AnyValue, int]] = [] + new_queue = [] for data in queue: # small optimization, can stop calculating the size once it reaches the 1 MB limit. From 87c08bca59be777360b7127149fed900703afb95 Mon Sep 17 00:00:00 2001 From: liustve Date: Sat, 28 Jun 2025 18:50:27 +0000 Subject: [PATCH 22/52] linting fix --- .../otlp/aws/logs/test_aws_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index a6fc7baa3..3a7bb5ef9 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -251,7 +251,7 @@ def generate_nested_value(depth, value, create_map=True) -> AnyValue: logs = [] - for index in range(count): + for _ in range(count): record = LogRecord( timestamp=int(time.time_ns()), trace_id=0x123456789ABCDEF0123456789ABCDEF0, From e68c4fd1baebc6bbf48e9d5358ca8266df4f21f3 Mon Sep 17 00:00:00 2001 From: liustve Date: Sat, 28 Jun 2025 18:54:32 +0000 Subject: [PATCH 23/52] linting fix --- .../exporter/otlp/aws/logs/aws_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 7d95c70ab..e68c61750 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -98,7 +98,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: _logger.exception("Exception while exporting logs: %s", exception) detach(token) - def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: + def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: disable=too-many-branches """ Estimates the size in bytes of a log by calculating the size of its body and its attributes and adding a buffer amount to account for other log metadata information. From 93e28360ede50dd7d6b705aa23a5487fb5e61404 Mon Sep 17 00:00:00 2001 From: liustve Date: Sat, 28 Jun 2025 19:20:21 +0000 Subject: [PATCH 24/52] add cycle detection --- .../distro/aws_opentelemetry_configurator.py | 2 +- .../logs/aws_batch_log_record_processor.py | 35 +++- .../test_aws_batch_log_record_processor.py | 178 +++++++++--------- 3 files changed, 117 insertions(+), 98 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index bebaf830d..a861ae3db 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -4,7 +4,7 @@ import logging import os import re -from logging import NOTSET, Logger, getLogger +from logging import Logger, getLogger from typing import ClassVar, Dict, List, NamedTuple, Optional, Type, Union from importlib_metadata import version diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 0568c9296..e68c61750 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import logging -from typing import List, Mapping, Optional, Sequence, cast +from typing import Mapping, Optional, Sequence, cast from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY, attach, detach, set_value @@ -98,11 +98,12 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: _logger.exception("Exception while exporting logs: %s", exception) detach(token) - def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: + def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: disable=too-many-branches """ Estimates the size in bytes of a log by calculating the size of its body and its attributes and adding a buffer amount to account for other log metadata information. Will process complex log structures up to the specified depth limit. + Includes cycle detection to prevent processing the log content more than once. If the depth limit of the log structure is exceeded, returns the truncated calculation to everything up to that point. @@ -114,14 +115,21 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: int: The estimated size of the log object in bytes """ - # Use a queue to prevent excessive recursive calls. - # We calculate based on the size of the log record body and attributes for the log. - queue: List[tuple[AnyValue, int]] = [(log.log_record.body, 0), (log.log_record.attributes, -1)] + # Queue contains tuples of (log_content, depth) where: + # - log_content is the current piece of log data being processed + # - depth tracks how many levels deep we've traversed to reach this content + # - body starts at depth 0 since it's an AnyValue object + # - Attributes start at depth -1 since it's a Mapping[str, AnyValue] - when traversed, we will + # start processing its keys at depth 0 + queue = [(log.log_record.body, 0), (log.log_record.attributes, -1)] + + # Track visited complex log contents to avoid calculating the same one more than once + visited = set() size: int = self._BASE_LOG_BUFFER_BYTE_SIZE while queue: - new_queue: List[tuple[AnyValue, int]] = [] + new_queue = [] for data in queue: # small optimization, can stop calculating the size once it reaches the 1 MB limit. @@ -130,19 +138,30 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: next_val, current_depth = data - if isinstance(next_val, (str, bytes)): - size += len(next_val) + if next_val is None: continue if isinstance(next_val, bool): size += 4 if next_val else 5 continue + if isinstance(next_val, (str, bytes)): + size += len(next_val) + continue + if isinstance(next_val, (float, int)): size += len(str(next_val)) continue + # next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"], if current_depth <= depth: + obj_id = id( + next_val + ) # Guaranteed to be unique, see: https://www.w3schools.com/python/ref_func_id.asp + if obj_id in visited: + continue + visited.add(obj_id) + if isinstance(next_val, Sequence): for content in next_val: new_queue.append((cast(AnyValue, content), current_depth + 1)) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 62673c566..3a7bb5ef9 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -24,75 +24,95 @@ def setUp(self): self.mock_exporter.export.return_value = LogExportResult.SUCCESS self.processor = AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=self.mock_exporter) - self.max_log_size = self.processor._MAX_LOG_REQUEST_BYTE_SIZE - self.base_log_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE def test_process_log_data_nested_structure(self): """Tests that the processor correctly handles nested structures (dict/list)""" - message_size = 400 - message = "X" * message_size + log_body = "X" * 400 + log_key = "test" + log_depth = 2 - nest_dict_log = self.generate_test_log_data( - log_body=message, attr_key="t", attr_val=message, log_body_depth=2, attr_depth=2, count=1, create_map=True + nested_dict_log = self.generate_test_log_data( + log_body=log_body, log_key=log_key, log_body_depth=log_depth, count=1, create_map=True ) - nest_array_log = self.generate_test_log_data( - log_body=message, attr_key="t", attr_val=message, log_body_depth=2, attr_depth=2, count=1, create_map=False + nested_array_log = self.generate_test_log_data( + log_body=log_body, log_key=log_key, log_body_depth=log_depth, count=1, create_map=False ) - expected_size = self.base_log_size + message_size * 2 + expected_dict_size = len(log_key) * log_depth + len(log_body) + expected_array_size = len(log_body) + + dict_size = self.processor._estimate_log_size(log=nested_dict_log[0], depth=log_depth) + array_size = self.processor._estimate_log_size(log=nested_array_log[0], depth=log_depth) + + self.assertEqual(dict_size - self.processor._BASE_LOG_BUFFER_BYTE_SIZE, expected_dict_size) + self.assertEqual(array_size - self.processor._BASE_LOG_BUFFER_BYTE_SIZE, expected_array_size) + + def test_process_log_data_with_attributes(self): + """Tests that the processor correctly handles both body and attributes""" + log_body = "test_body" + attr_key = "attr_key" + attr_value = "attr_value" + + record = LogRecord( + timestamp=int(time.time_ns()), + trace_id=0x123456789ABCDEF0123456789ABCDEF0, + span_id=0x123456789ABCDEF0, + trace_flags=TraceFlags(1), + severity_text="INFO", + severity_number=SeverityNumber.INFO, + body=log_body, + attributes={attr_key: attr_value}, + ) + log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) - dict_size = self.processor._estimate_log_size(log=nest_dict_log[0], depth=2) - array_size = self.processor._estimate_log_size(log=nest_array_log[0], depth=2) + expected_size = len(log_body) + len(attr_key) + len(attr_value) + actual_size = self.processor._estimate_log_size(log_data) - # Asserting almost equal to account for dictionary keys in the Log object - self.assertAlmostEqual(dict_size, expected_size, delta=10) - self.assertAlmostEqual(array_size, expected_size, delta=10) + self.assertEqual(actual_size - self.processor._BASE_LOG_BUFFER_BYTE_SIZE, expected_size) def test_process_log_data_nested_structure_exceeds_depth(self): """Tests that the processor cuts off calculation for nested structure that exceeds the depth limit""" - calculated = "X" * 400 - message = {"calculated": calculated, "truncated": {"truncated": {"test": "X" * self.max_log_size}}} - - # *2 since we set this message in both body and attributes - expected_size = self.base_log_size + (len("calculated") + len(calculated) + len("truncated")) * 2 + max_depth = 0 + calculated_body = "X" * 400 + log_body = { + "calculated": "X" * 400, + "restOfThisLogWillBeTruncated": {"truncated": {"test": "X" * self.processor._MAX_LOG_REQUEST_BYTE_SIZE}}, + } - nest_dict_log = self.generate_test_log_data( - log_body=message, attr_key="t", attr_val=message, log_body_depth=3, attr_depth=3, count=1, create_map=True - ) - nest_array_log = self.generate_test_log_data( - log_body=message, attr_key="t", attr_val=message, log_body_depth=3, attr_depth=3, count=1, create_map=False + expected_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE + ( + len("calculated") + len(calculated_body) + len("restOfThisLogWillBeTruncated") ) - # Only calculates log size of up to depth of 4 - dict_size = self.processor._estimate_log_size(log=nest_dict_log[0], depth=4) - array_size = self.processor._estimate_log_size(log=nest_array_log[0], depth=4) + test_logs = self.generate_test_log_data(log_body=log_body, count=1) + + # Only calculates log size of up to depth of 0 + dict_size = self.processor._estimate_log_size(log=test_logs[0], depth=max_depth) - # Asserting almost equal to account for dictionary keys in the Log object body - self.assertAlmostEqual(dict_size, expected_size, delta=10) - self.assertAlmostEqual(array_size, expected_size, delta=10) + self.assertEqual(dict_size, expected_size) def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): """Tests that the processor returns prematurely if the size already exceeds _MAX_LOG_REQUEST_BYTE_SIZE""" - # Should stop calculation at bigKey - message = { - "bigKey": "X" * (self.max_log_size), - "smallKey": "X" * (self.max_log_size * 10), + # Should stop calculation at bigKey + biggerKey and not calculate the content of biggerKey + log_body = { + "bigKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE), + "biggerKey": "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE * 100), } - expected_size = self.base_log_size + self.max_log_size + len("bigKey") - - nest_dict_log = self.generate_test_log_data( - log_body=message, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=1, create_map=True - ) - nest_array_log = self.generate_test_log_data( - log_body=message, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=1, create_map=False + expected_size = ( + self.processor._BASE_LOG_BUFFER_BYTE_SIZE + + self.processor._MAX_LOG_REQUEST_BYTE_SIZE + + len("bigKey") + + len("biggerKey") ) + nest_dict_log = self.generate_test_log_data(log_body=log_body, count=1, create_map=True) + nest_array_log = self.generate_test_log_data(log_body=log_body, count=1, create_map=False) + dict_size = self.processor._estimate_log_size(log=nest_dict_log[0]) array_size = self.processor._estimate_log_size(log=nest_array_log[0]) - self.assertAlmostEqual(dict_size, expected_size, delta=10) - self.assertAlmostEqual(array_size, expected_size, delta=10) + self.assertEqual(dict_size, expected_size) + self.assertEqual(array_size, expected_size) def test_process_log_data_primitive(self): @@ -100,20 +120,21 @@ def test_process_log_data_primitive(self): expected_sizes = [4, 4, 1, 3, 4, 5, 0] for index, primitive in enumerate(primitives): - log = self.generate_test_log_data( - log_body=primitive, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=1, - ) - - expected_size = self.base_log_size + expected_sizes[index] + log = self.generate_test_log_data(log_body=primitive, count=1) + expected_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE + expected_sizes[index] actual_size = self.processor._estimate_log_size(log[0]) - self.assertEqual(actual_size, expected_size) + def test_process_log_data_with_cycle(self): + """Test that processor handles processing logs with circular references only once""" + cyclic_dict: dict = {"data": "test"} + cyclic_dict["self_ref"] = cyclic_dict + + log = self.generate_test_log_data(log_body=cyclic_dict, count=1) + expected_size = self.processor._BASE_LOG_BUFFER_BYTE_SIZE + len("data") + len("self_ref") + len("test") + actual_size = self.processor._estimate_log_size(log[0]) + self.assertEqual(actual_size, expected_size) + @patch( "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", return_value=MagicMock(), @@ -124,9 +145,7 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): """Tests that export is only called once if a single batch is under the size limit""" log_count = 10 log_body = "test" - test_logs = self.generate_test_log_data( - log_body=log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=log_count - ) + test_logs = self.generate_test_log_data(log_body=log_body, count=log_count) total_data_size = 0 for log in test_logs: @@ -153,9 +172,7 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): """Should make multiple export calls of batch size 1 to export logs of size > 1 MB.""" large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) - test_logs = self.generate_test_log_data( - log_body=large_log_body, attr_key="", attr_val="", log_body_depth=-1, attr_depth=-1, count=15 - ) + test_logs = self.generate_test_log_data(log_body=large_log_body, count=15) for log in test_logs: self.processor._queue.appendleft(log) @@ -178,26 +195,13 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): """Should make calls to export smaller sub-batch logs""" - large_log_body = "X" * (self.max_log_size + 1) - small_log_body = "X" * (self.max_log_size // 10 - self.base_log_size) - - large_logs = self.generate_test_log_data( - log_body=large_log_body, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=3, + large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) + small_log_body = "X" * ( + self.processor._MAX_LOG_REQUEST_BYTE_SIZE // 10 - self.processor._BASE_LOG_BUFFER_BYTE_SIZE ) - small_logs = self.generate_test_log_data( - log_body=small_log_body, - attr_key="", - attr_val="", - log_body_depth=-1, - attr_depth=-1, - count=12, - ) + large_logs = self.generate_test_log_data(log_body=large_log_body, count=3) + small_logs = self.generate_test_log_data(log_body=small_log_body, count=12) # 1st, 2nd, 3rd batch = size 1 # 4th batch = size 10 @@ -230,36 +234,32 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): @staticmethod def generate_test_log_data( log_body, - attr_key, - attr_val, - log_body_depth=3, - attr_depth=3, + log_key="key", + log_body_depth=0, count=5, create_map=True, - instrumentation_scope=InstrumentationScope("test-scope", "1.0.0"), ) -> List[LogData]: def generate_nested_value(depth, value, create_map=True) -> AnyValue: - if depth < 0: + if depth <= 0: return value if create_map: - return {"t": generate_nested_value(depth - 1, value, True)} + return {log_key: generate_nested_value(depth - 1, value, True)} return [generate_nested_value(depth - 1, value, False)] logs = [] - for index in range(count): + for _ in range(count): record = LogRecord( timestamp=int(time.time_ns()), - trace_id=int(f"0x{index + 1:032x}", 16), - span_id=int(f"0x{index + 1:016x}", 16), + trace_id=0x123456789ABCDEF0123456789ABCDEF0, + span_id=0x123456789ABCDEF0, trace_flags=TraceFlags(1), severity_text="INFO", severity_number=SeverityNumber.INFO, body=generate_nested_value(log_body_depth, log_body, create_map), - attributes={attr_key: generate_nested_value(attr_depth, attr_val, create_map)}, ) log_data = LogData(log_record=record, instrumentation_scope=InstrumentationScope("test-scope", "1.0.0")) From e1ff7b23f1994587f352d66336b36d286f346e7f Mon Sep 17 00:00:00 2001 From: liustve Date: Sat, 28 Jun 2025 19:25:57 +0000 Subject: [PATCH 25/52] log processor race condition fix --- .../logs/aws_batch_log_record_processor.py | 9 ++++ .../test_aws_batch_log_record_processor.py | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index e68c61750..b4e3ea38b 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -178,3 +178,12 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: di queue = new_queue return size + + # Only export the logs once to avoid the race condition of the worker thread and force flush thread + # https://github.com/open-telemetry/opentelemetry-python/issues/3193 + # https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L199 + def force_flush(self, timeout_millis: Optional[int] = None) -> bool: + if self._shutdown: + return False + self._export(BatchLogExportStrategy.EXPORT_AT_LEAST_ONE_BATCH) + return True diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 3a7bb5ef9..d340c1c21 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -231,6 +231,48 @@ def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): expected_size = expected_sizes[index] self.assertEqual(len(batch), expected_size) + def test_force_flush_returns_false_when_shutdown(self): + """Tests that force_flush returns False when processor is shutdown""" + self.processor.shutdown() + result = self.processor.force_flush() + + # Verify force_flush returns False and no export is called + self.assertFalse(result) + self.mock_exporter.export.assert_not_called() + + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + return_value=MagicMock(), + ) + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + def test_force_flush_exports_only_one_batch(self, _, __, ___): + """Tests that force_flush should try to at least export one batch of logs. Rest of the logs will be dropped""" + # Set max_export_batch_size to 5 to limit batch size + self.processor._max_export_batch_size = 5 + self.processor._shutdown = False + + # Add 6 logs to queue, after the export there should be 1 log remaining + log_count = 6 + test_logs = self.generate_test_log_data(log_body="test message", count=log_count) + + for log in test_logs: + self.processor._queue.appendleft(log) + + self.assertEqual(len(self.processor._queue), log_count) + + result = self.processor.force_flush() + + self.assertTrue(result) + # 45 logs should remain + self.assertEqual(len(self.processor._queue), 1) + self.mock_exporter.export.assert_called_once() + + # Verify only one batch of 5 logs was exported + args, _ = self.mock_exporter.export.call_args + exported_batch = args[0] + self.assertEqual(len(exported_batch), 5) + @staticmethod def generate_test_log_data( log_body, From cb21d39d63286130faaddfcb4f6c8e98b49c4e0e Mon Sep 17 00:00:00 2001 From: liustve Date: Mon, 30 Jun 2025 17:40:44 +0000 Subject: [PATCH 26/52] add comment about termination of loop --- .../exporter/otlp/aws/logs/otlp_aws_logs_exporter.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 845a80ecb..f5aa32460 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -61,8 +61,7 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: Key behaviors: 1. Always compresses data with gzip before sending - 2. Adds truncatable fields header for large Gen AI logs (>1MB) - 3. Implements Retry-After header support for throttling responses + 2. Implements Retry-After header support for throttling responses """ if self._shutdown: @@ -77,6 +76,11 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) + # This loop will eventually terminate because: + # 1) The export request will eventually either succeed or fail permanently + # 2) The exponential backoff generator has a max value of _MAX_RETRY_TIMEOUT (64s) + # 3) After enough retries, delay will equal _MAX_RETRY_TIMEOUT, forcing exit + # 4) Non-retryable errors (4xx except 429) immediately exit the loop while True: resp = self._send(data) From 5260e901cae2af917c31df4e78f8da5a77d4d6f3 Mon Sep 17 00:00:00 2001 From: liustve Date: Mon, 30 Jun 2025 21:35:18 +0000 Subject: [PATCH 27/52] conolidate botocore sessions into utils --- .../src/amazon/opentelemetry/distro/_utils.py | 41 ++++----- .../distro/aws_opentelemetry_configurator.py | 84 ++++++++++++------ .../distro/aws_opentelemetry_distro.py | 3 +- .../aws/metrics/_cloudwatch_log_client.py | 5 +- .../otlp/aws/common/aws_auth_session.py | 72 ++++++--------- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 17 +++- .../otlp/aws/traces/otlp_aws_span_exporter.py | 15 ++-- .../test_aws_cloudwatch_emf_exporter.py | 9 +- .../aws/metrics/test_cloudwatch_log_client.py | 5 +- .../otlp/aws/common/test_aws_auth_session.py | 17 ---- .../aws/logs/test_otlp_aws_logs_exporter.py | 2 +- .../aws/traces/test_otlp_aws_span_exporter.py | 18 ++-- .../test_aws_opentelementry_configurator.py | 38 ++++---- .../amazon/opentelemetry/distro/test_utils.py | 87 ++++++++----------- 14 files changed, 202 insertions(+), 211 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py index be241885d..deb652ca3 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py @@ -4,6 +4,7 @@ import os from importlib.metadata import PackageNotFoundError, version from logging import Logger, getLogger +from typing import Optional from packaging.requirements import Requirement @@ -37,30 +38,18 @@ def is_agent_observability_enabled() -> bool: return os.environ.get(AGENT_OBSERVABILITY_ENABLED, "false").lower() == "true" -def get_aws_region() -> str: - """Get AWS region using botocore session. - - botocore automatically checks in the following priority order: - 1. AWS_REGION environment variable - 2. AWS_DEFAULT_REGION environment variable - 3. AWS CLI config file (~/.aws/config) - 4. EC2 instance metadata service - - Returns: - The AWS region if found, None otherwise. - """ - if is_installed("botocore"): - try: - from botocore import session # pylint: disable=import-outside-toplevel - - botocore_session = session.Session() - if botocore_session.region_name: - return botocore_session.region_name - except (ImportError, AttributeError): - # botocore failed to determine region - pass - - _logger.warning( - "AWS region not found. Please set AWS_REGION environment variable or configure AWS CLI with 'aws configure'." - ) +IS_BOTOCORE_INSTALLED: bool = is_installed("botocore") + + +def get_aws_session(): + if IS_BOTOCORE_INSTALLED: + # pylint: disable=import-outside-toplevel + from botocore.session import Session + + return Session() return None + + +def get_aws_region() -> Optional[str]: + botocore_session = get_aws_session() + return botocore_session.get_config_variable("region") if botocore_session else None diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index a861ae3db..56f94649c 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -12,7 +12,7 @@ from amazon.opentelemetry.distro._aws_attribute_keys import AWS_LOCAL_SERVICE from amazon.opentelemetry.distro._aws_resource_attribute_configurator import get_service_attribute -from amazon.opentelemetry.distro._utils import is_agent_observability_enabled, is_installed +from amazon.opentelemetry.distro._utils import IS_BOTOCORE_INSTALLED, get_aws_session, is_agent_observability_enabled from amazon.opentelemetry.distro.always_record_sampler import AlwaysRecordSampler from amazon.opentelemetry.distro.attribute_propagating_span_processor_builder import ( AttributePropagatingSpanProcessorBuilder, @@ -23,11 +23,6 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( - AwsCloudWatchOtlpBatchLogRecordProcessor, -) -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter -from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler from amazon.opentelemetry.distro.scope_based_exporter import ScopeBasedPeriodicExportingMetricReader @@ -214,8 +209,7 @@ def _init_logging( for _, exporter_class in exporters.items(): exporter_args = {} log_exporter: LogExporter = _customize_logs_exporter(exporter_class(**exporter_args)) - log_processor = _customize_log_record_processor(log_exporter) - provider.add_log_record_processor(log_processor) + _customize_log_record_processor(provider, log_exporter) event_logger_provider = EventLoggerProvider(logger_provider=provider) set_event_logger_provider(event_logger_provider) @@ -303,7 +297,7 @@ def _export_unsampled_span_for_agent_observability(trace_provider: TracerProvide traces_endpoint = os.environ.get(OTEL_EXPORTER_OTLP_TRACES_ENDPOINT) - span_exporter = OTLPAwsSpanExporter(endpoint=traces_endpoint, logger_provider=get_logger_provider()) + span_exporter = _create_aws_exporter(endpoint=traces_endpoint) trace_provider.add_span_processor(BatchUnsampledSpanProcessor(span_exporter=span_exporter)) @@ -404,15 +398,7 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> _logger.info("Detected using AWS OTLP Traces Endpoint.") if isinstance(span_exporter, OTLPSpanExporter): - if is_agent_observability_enabled(): - # Span exporter needs an instance of logger provider in ai agent - # observability case because we need to split input/output prompts - # from span attributes and send them to the logs pipeline per - # the new Gen AI semantic convention from OTel - # ref: https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-events/ - span_exporter = OTLPAwsSpanExporter(endpoint=traces_endpoint, logger_provider=get_logger_provider()) - else: - span_exporter = OTLPAwsSpanExporter(endpoint=traces_endpoint) + return _create_aws_exporter(endpoint=traces_endpoint) else: _logger.warning( @@ -426,14 +412,20 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> return AwsMetricAttributesSpanExporterBuilder(span_exporter, resource).build() -def _customize_log_record_processor(log_exporter: LogExporter): - if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): - return AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=log_exporter) +def _customize_log_record_processor(provider: LoggerProvider, log_exporter: Optional[LogExporter]) -> None: + if log_exporter is None: + return + if is_agent_observability_enabled() and IS_BOTOCORE_INSTALLED: + from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( + AwsCloudWatchOtlpBatchLogRecordProcessor, + ) - return BatchLogRecordProcessor(exporter=log_exporter) + provider.add_log_record_processor(AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=log_exporter)) + else: + provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter)) -def _customize_logs_exporter(log_exporter: LogExporter) -> LogExporter: +def _customize_logs_exporter(log_exporter: LogExporter) -> Optional[LogExporter]: logs_endpoint = os.environ.get(OTEL_EXPORTER_OTLP_LOGS_ENDPOINT) if _is_aws_otlp_endpoint(logs_endpoint, "logs"): @@ -443,7 +435,7 @@ def _customize_logs_exporter(log_exporter: LogExporter) -> LogExporter: # Setting default compression mode to Gzip as this is the behavior in upstream's # collector otlp http exporter: # https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter - return OTLPAwsLogExporter(endpoint=logs_endpoint) + return _create_aws_exporter(endpoint=logs_endpoint) _logger.warning( "Improper configuration see: please export/set " @@ -762,8 +754,9 @@ def _check_emf_exporter_enabled() -> bool: def create_emf_exporter(): """Create and configure the CloudWatch EMF exporter.""" try: + session = get_aws_session() # Check if botocore is available before importing the EMF exporter - if not is_installed("botocore"): + if not session: _logger.warning("botocore is not installed. EMF exporter requires botocore") return None @@ -778,6 +771,7 @@ def create_emf_exporter(): return None return AwsCloudWatchEmfExporter( + session=session, namespace=log_header_setting.namespace, log_group_name=log_header_setting.log_group, log_stream_name=log_header_setting.log_stream, @@ -786,3 +780,43 @@ def create_emf_exporter(): except Exception as errors: _logger.error("Failed to create EMF exporter: %s", errors) return None + + +def _create_aws_exporter(endpoint: str): + """Create and configure the AWS OTLP exporters.""" + try: + session = get_aws_session() + # Check if botocore is available before importing the AWS exporter + if not session: + _logger.warning("SigV4 Auth requires botocore to be enabled") + return None + + # pylint: disable=import-outside-toplevel + from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter + from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter + + endpoint = endpoint.lower() + split = endpoint.split(".") + service = split[0] + region = split[1] + + if "xray" in service: + if is_agent_observability_enabled(): + # Span exporter needs an instance of logger provider in ai agent + # observability case because we need to split input/output prompts + # from span attributes and send them to the logs pipeline per + # the new Gen AI semantic convention from OTel + # ref: https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-events/ + return OTLPAwsSpanExporter( + session=session, endpoint=endpoint, aws_region=region, logger_provider=get_logger_provider() + ) + + return OTLPAwsSpanExporter(session=session, endpoint=endpoint, aws_region=region) + + if "logs" in service: + return OTLPAwsLogExporter(session=session, aws_region=region) + + # pylint: disable=broad-exception-caught + except Exception as errors: + _logger.error("Failed to create AWS OTLP exporter: %s", errors) + return None diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py index cf8109780..104707bbe 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_distro.py @@ -88,8 +88,9 @@ def _configure(self, **kwargs): # Set GenAI capture content default os.environ.setdefault(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "true") - # Set OTLP endpoints with AWS region if not already set region = get_aws_region() + + # Set OTLP endpoints with AWS region if not already set if region: os.environ.setdefault( OTEL_EXPORTER_OTLP_TRACES_ENDPOINT, f"https://xray.{region}.amazonaws.com/v1/traces" diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/aws/metrics/_cloudwatch_log_client.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/aws/metrics/_cloudwatch_log_client.py index 72236121f..a62b34c43 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/aws/metrics/_cloudwatch_log_client.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/aws/metrics/_cloudwatch_log_client.py @@ -8,8 +8,8 @@ import uuid from typing import Any, Dict, List, Optional -import botocore.session from botocore.exceptions import ClientError +from botocore.session import Session logger = logging.getLogger(__name__) @@ -90,6 +90,7 @@ class CloudWatchLogClient: def __init__( self, log_group_name: str, + session: Session = Session(), log_stream_name: Optional[str] = None, aws_region: Optional[str] = None, **kwargs, @@ -105,8 +106,6 @@ def __init__( """ self.log_group_name = log_group_name self.log_stream_name = log_stream_name or self._generate_log_stream_name() - - session = botocore.session.Session() self.logs_client = session.create_client("logs", region_name=aws_region, **kwargs) # Event batch to store logs before sending to CloudWatch diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py index 2c383592b..c945727c5 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py @@ -4,8 +4,9 @@ import logging import requests - -from amazon.opentelemetry.distro._utils import is_installed +from botocore.auth import SigV4Auth +from botocore.awsrequest import AWSRequest +from botocore.session import Session _logger = logging.getLogger(__name__) @@ -33,57 +34,36 @@ class AwsAuthSession(requests.Session): service (str): The AWS service name for signing (e.g., "logs" or "xray") """ - def __init__(self, aws_region, service): - - self._has_required_dependencies = False - - # Requires botocore to be installed to sign the headers. However, - # some users might not need to use this authenticator. In order not conflict - # with existing behavior, we check for botocore before initializing this exporter. - - if aws_region and service and is_installed("botocore"): - # pylint: disable=import-outside-toplevel - from botocore import auth, awsrequest, session - - self._boto_auth = auth - self._boto_aws_request = awsrequest - self._boto_session = session.Session() - - self._aws_region = aws_region - self._service = service - self._has_required_dependencies = True - - else: - _logger.error( - "botocore is required to enable SigV4 Authentication. Please install it using `pip install botocore`", - ) + def __init__(self, aws_region: str, service: str, session: Session = Session()): + self._aws_region: str = aws_region + self._service: str = service + self._session: Session = session super().__init__() def request(self, method, url, *args, data=None, headers=None, **kwargs): - if self._has_required_dependencies: - - credentials = self._boto_session.get_credentials() - - if credentials is not None: - signer = self._boto_auth.SigV4Auth(credentials, self._service, self._aws_region) - - request = self._boto_aws_request.AWSRequest( - method="POST", - url=url, - data=data, - headers={"Content-Type": "application/x-protobuf"}, - ) + credentials = self._session.get_credentials() + + if credentials: + signer = SigV4Auth(credentials, self._service, self._aws_region) + request = AWSRequest( + method="POST", + url=url, + data=data, + headers={"Content-Type": "application/x-protobuf"}, + ) - try: - signer.add_auth(request) + try: + signer.add_auth(request) - if headers is None: - headers = {} + if headers is None: + headers = {} - headers.update(dict(request.headers)) + headers.update(dict(request.headers)) - except Exception as signing_error: # pylint: disable=broad-except - _logger.error("Failed to sign request: %s", signing_error) + except Exception as signing_error: # pylint: disable=broad-except + _logger.error("Failed to sign request: %s", signing_error) + else: + _logger.error("Failed to load AWS Credentials: %s") return super().request(method=method, url=url, *args, data=data, headers=headers, **kwargs) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 845a80ecb..4b00dc3b0 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -7,6 +7,7 @@ from time import sleep from typing import Dict, Optional, Sequence +from botocore.session import Session from requests import Response from requests.exceptions import ConnectionError as RequestsConnectionError from requests.structures import CaseInsensitiveDict @@ -28,6 +29,10 @@ class OTLPAwsLogExporter(OTLPLogExporter): def __init__( self, + aws_region: str, + session: Session = Session(), + log_group: Optional[str] = None, + log_stream: Optional[str] = None, endpoint: Optional[str] = None, certificate_file: Optional[str] = None, client_key_file: Optional[str] = None, @@ -35,10 +40,14 @@ def __init__( headers: Optional[Dict[str, str]] = None, timeout: Optional[int] = None, ): - self._aws_region = None + self._aws_region = aws_region - if endpoint: - self._aws_region = endpoint.split(".")[1] + if log_group and log_stream: + log_headers = {"x-aws-log-group": log_group, "x-aws-log-stream": log_stream} + if headers: + headers.update(log_headers) + else: + headers = log_headers OTLPLogExporter.__init__( self, @@ -49,7 +58,7 @@ def __init__( headers, timeout, compression=Compression.Gzip, - session=AwsAuthSession(aws_region=self._aws_region, service="logs"), + session=AwsAuthSession(session=session, aws_region=self._aws_region, service="logs"), ) def export(self, batch: Sequence[LogData]) -> LogExportResult: diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py index 7c608e885..0e5169525 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py @@ -4,6 +4,8 @@ import logging from typing import Dict, Optional, Sequence +from botocore.session import Session + from amazon.opentelemetry.distro._utils import is_agent_observability_enabled from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession from amazon.opentelemetry.distro.llo_handler import LLOHandler @@ -14,12 +16,14 @@ from opentelemetry.sdk.trace import ReadableSpan from opentelemetry.sdk.trace.export import SpanExportResult -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) class OTLPAwsSpanExporter(OTLPSpanExporter): def __init__( self, + aws_region: str, + session: Session = Session(), endpoint: Optional[str] = None, certificate_file: Optional[str] = None, client_key_file: Optional[str] = None, @@ -29,13 +33,10 @@ def __init__( compression: Optional[Compression] = None, logger_provider: Optional[LoggerProvider] = None, ): - self._aws_region = None + self._aws_region = aws_region self._logger_provider = logger_provider self._llo_handler = None - if endpoint: - self._aws_region = endpoint.split(".")[1] - OTLPSpanExporter.__init__( self, endpoint, @@ -45,7 +46,7 @@ def __init__( headers, timeout, compression, - session=AwsAuthSession(aws_region=self._aws_region, service="xray"), + session=AwsAuthSession(session=session, aws_region=self._aws_region, service="xray"), ) def _ensure_llo_handler(self): @@ -55,7 +56,7 @@ def _ensure_llo_handler(self): try: self._logger_provider = get_logger_provider() except Exception as exc: # pylint: disable=broad-exception-caught - logger.debug("Failed to get logger provider: %s", exc) + _logger.debug("Failed to get logger provider: %s", exc) return False if self._logger_provider: diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/aws/metrics/test_aws_cloudwatch_emf_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/aws/metrics/test_aws_cloudwatch_emf_exporter.py index 7ac67fddf..9a90c56a5 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/aws/metrics/test_aws_cloudwatch_emf_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/aws/metrics/test_aws_cloudwatch_emf_exporter.py @@ -132,7 +132,9 @@ def setUp(self): mock_session.return_value = mock_session_instance mock_session_instance.create_client.return_value = mock_client - self.exporter = AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + self.exporter = AwsCloudWatchEmfExporter( + session=mock_session, namespace="TestNamespace", log_group_name="test-log-group" + ) def test_initialization(self): """Test exporter initialization.""" @@ -150,6 +152,7 @@ def test_initialization_with_custom_params(self, mock_session): mock_session_instance.create_client.return_value = mock_client exporter = AwsCloudWatchEmfExporter( + session=mock_session_instance, namespace="CustomNamespace", log_group_name="custom-log-group", log_stream_name="custom-stream", @@ -578,7 +581,9 @@ def test_initialization_with_env_region(self, mock_session, mock_env_get): mock_session.return_value = mock_session_instance mock_session_instance.create_client.return_value = mock_client - exporter = AwsCloudWatchEmfExporter(namespace="TestNamespace", log_group_name="test-log-group") + exporter = AwsCloudWatchEmfExporter( + session=mock_session, namespace="TestNamespace", log_group_name="test-log-group" + ) # Just verify the exporter was created successfully with region handling self.assertIsNotNone(exporter) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/aws/metrics/test_cloudwatch_log_client.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/aws/metrics/test_cloudwatch_log_client.py index 0215962db..2793aeb34 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/aws/metrics/test_cloudwatch_log_client.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/aws/metrics/test_cloudwatch_log_client.py @@ -24,7 +24,7 @@ def setUp(self): mock_session.return_value = mock_session_instance mock_session_instance.create_client.return_value = mock_client - self.log_client = CloudWatchLogClient(log_group_name="test-log-group") + self.log_client = CloudWatchLogClient(session=mock_session, log_group_name="test-log-group") def test_initialization(self): """Test log client initialization.""" @@ -42,6 +42,7 @@ def test_initialization_with_custom_params(self, mock_session): mock_session_instance.create_client.return_value = mock_client log_client = CloudWatchLogClient( + session=mock_session, log_group_name="custom-log-group", log_stream_name="custom-stream", aws_region="us-west-2", @@ -479,7 +480,7 @@ def test_initialization_with_custom_log_stream_name(self, mock_session): mock_session.return_value.create_client.return_value = mock_client custom_stream = "my-custom-stream" - client = CloudWatchLogClient("test-group", log_stream_name=custom_stream) + client = CloudWatchLogClient(session=mock_session, log_group_name="test-group", log_stream_name=custom_stream) self.assertEqual(client.log_stream_name, custom_stream) def test_send_log_batch_empty_batch_no_aws_call(self): diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py index 85a6c8958..c40a3ebeb 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -1,6 +1,5 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -from importlib.metadata import PackageNotFoundError from unittest import TestCase from unittest.mock import patch @@ -20,22 +19,6 @@ class TestAwsAuthSession(TestCase): - @patch("amazon.opentelemetry.distro._utils.version") - @patch.dict("sys.modules", {"botocore": None}) - @patch("requests.Session.request", return_value=requests.Response()) - def test_aws_auth_session_no_botocore(self, mock_request, mock_version): - """Tests that aws_auth_session will not inject SigV4 Headers if botocore is not installed.""" - mock_version.side_effect = PackageNotFoundError("botocore") - - session = AwsAuthSession("us-east-1", "xray") - actual_headers = {"test": "test"} - - session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) - - self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) - self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) - self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) - @patch("requests.Session.request", return_value=requests.Response()) @patch("botocore.session.Session.get_credentials", return_value=None) def test_aws_auth_session_no_credentials(self, _, __): diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 31e401643..297642a28 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -36,7 +36,7 @@ class TestOTLPAwsLogsExporter(TestCase): def setUp(self): self.logs = self.generate_test_log_data() - self.exporter = OTLPAwsLogExporter(endpoint=self._ENDPOINT) + self.exporter = OTLPAwsLogExporter(aws_region="us-east-1", endpoint=self._ENDPOINT) @patch("requests.Session.post", return_value=good_response) def test_export_success(self, mock_request): diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py index d0b2a004d..63bd1c227 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py @@ -17,7 +17,7 @@ def test_init_with_logger_provider(self): mock_logger_provider = MagicMock(spec=LoggerProvider) endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint, logger_provider=mock_logger_provider) + exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint, logger_provider=mock_logger_provider) self.assertEqual(exporter._logger_provider, mock_logger_provider) self.assertEqual(exporter._aws_region, "us-east-1") @@ -26,7 +26,7 @@ def test_init_without_logger_provider(self): # Test initialization without logger_provider (default behavior) endpoint = "https://xray.us-west-2.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint) + exporter = OTLPAwsSpanExporter(aws_region="us-west-2", endpoint=endpoint) self.assertIsNone(exporter._logger_provider) self.assertEqual(exporter._aws_region, "us-west-2") @@ -38,7 +38,7 @@ def test_ensure_llo_handler_when_disabled(self, mock_is_enabled): mock_is_enabled.return_value = False endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint) + exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) result = exporter._ensure_llo_handler() self.assertFalse(result) @@ -59,7 +59,7 @@ def test_ensure_llo_handler_lazy_initialization( mock_llo_handler_class.return_value = mock_llo_handler endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint) + exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) # First call should initialize result = exporter._ensure_llo_handler() @@ -87,7 +87,7 @@ def test_ensure_llo_handler_with_existing_logger_provider(self, mock_is_enabled, mock_logger_provider = MagicMock(spec=LoggerProvider) endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint, logger_provider=mock_logger_provider) + exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint, logger_provider=mock_logger_provider) with patch( "amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter.LLOHandler" @@ -110,7 +110,7 @@ def test_ensure_llo_handler_get_logger_provider_fails(self, mock_is_enabled, moc mock_get_logger_provider.side_effect = Exception("Failed to get logger provider") endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint) + exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) result = exporter._ensure_llo_handler() @@ -123,7 +123,7 @@ def test_export_with_llo_disabled(self, mock_is_enabled): mock_is_enabled.return_value = False endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint) + exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) # Mock the parent class export method with patch.object(OTLPSpanExporter, "export") as mock_parent_export: @@ -149,7 +149,7 @@ def test_export_with_llo_enabled(self, mock_llo_handler_class, mock_get_logger_p mock_llo_handler_class.return_value = mock_llo_handler endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint) + exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) # Mock spans and processed spans original_spans = [MagicMock(spec=ReadableSpan), MagicMock(spec=ReadableSpan)] @@ -182,7 +182,7 @@ def test_export_with_llo_processing_failure( mock_llo_handler.process_spans.side_effect = Exception("LLO processing failed") endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(endpoint=endpoint) + exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) spans = [MagicMock(spec=ReadableSpan), MagicMock(spec=ReadableSpan)] diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index f9afa96a9..6e5b7f7f4 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -821,14 +821,15 @@ def test_export_unsampled_span_for_agent_observability_uses_aws_exporter(self): mock_tracer_provider: TracerProvider = MagicMock() with patch( - "amazon.opentelemetry.distro.aws_opentelemetry_configurator.OTLPAwsSpanExporter" + "amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter.OTLPAwsSpanExporter" ) as mock_aws_exporter: with patch( - "amazon.opentelemetry.distro.aws_opentelemetry_configurator.BatchUnsampledSpanProcessor" - ) as mock_processor: + "amazon.opentelemetry.distro.aws_opentelemetry_configurator.get_logger_provider" + ) as mock_logger_provider: with patch( - "amazon.opentelemetry.distro.aws_opentelemetry_configurator.get_logger_provider" - ) as mock_logger_provider: + "amazon.opentelemetry.distro.aws_opentelemetry_configurator.get_aws_session" + ) as mock_session: + mock_session.return_value = MagicMock() os.environ["AGENT_OBSERVABILITY_ENABLED"] = "true" os.environ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"] = "https://xray.us-east-1.amazonaws.com/v1/traces" @@ -836,13 +837,13 @@ def test_export_unsampled_span_for_agent_observability_uses_aws_exporter(self): # Verify OTLPAwsSpanExporter is created with correct parameters mock_aws_exporter.assert_called_once_with( + session=mock_session.return_value, endpoint="https://xray.us-east-1.amazonaws.com/v1/traces", + aws_region="us-east-1", logger_provider=mock_logger_provider.return_value, ) - # Verify BatchUnsampledSpanProcessor wraps the exporter - mock_processor.assert_called_once_with(span_exporter=mock_aws_exporter.return_value) # Verify processor is added to tracer provider - mock_tracer_provider.add_span_processor.assert_called_once_with(mock_processor.return_value) + mock_tracer_provider.add_span_processor.assert_called_once() # Clean up os.environ.pop("AGENT_OBSERVABILITY_ENABLED", None) @@ -1007,17 +1008,16 @@ def test_validate_and_fetch_logs_header(self): os.environ.pop(OTEL_EXPORTER_OTLP_LOGS_HEADERS, None) @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._validate_and_fetch_logs_header") - @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.is_installed") - def test_create_emf_exporter(self, mock_is_installed, mock_validate): + @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.get_aws_session") + def test_create_emf_exporter(self, mock_get_session, mock_validate): # Test when botocore is not installed - mock_is_installed.return_value = False + mock_get_session.return_value = None result = create_emf_exporter() self.assertIsNone(result) - mock_is_installed.assert_called_with("botocore") # Reset mock for subsequent tests - mock_is_installed.reset_mock() - mock_is_installed.return_value = True + mock_get_session.reset_mock() + mock_get_session.return_value = MagicMock() # Mock the EMF exporter class import by patching the module import with patch( @@ -1040,7 +1040,10 @@ def test_create_emf_exporter(self, mock_is_installed, mock_validate): self.assertEqual(result, mock_exporter_instance) # Verify that the EMF exporter was called with correct parameters mock_emf_exporter_class.assert_called_with( - namespace=None, log_group_name="test-group", log_stream_name="test-stream" + session=mock_get_session.return_value, + namespace=None, + log_group_name="test-group", + log_stream_name="test-stream", ) # Test with valid configuration @@ -1050,7 +1053,10 @@ def test_create_emf_exporter(self, mock_is_installed, mock_validate): self.assertEqual(result, mock_exporter_instance) # Verify that the EMF exporter was called with correct parameters mock_emf_exporter_class.assert_called_with( - namespace="test-namespace", log_group_name="test-group", log_stream_name="test-stream" + session=mock_get_session.return_value, + namespace="test-namespace", + log_group_name="test-group", + log_stream_name="test-stream", ) # Test exception handling diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py index adb690359..1079feb4a 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py @@ -9,6 +9,7 @@ from amazon.opentelemetry.distro._utils import ( AGENT_OBSERVABILITY_ENABLED, get_aws_region, + get_aws_session, is_agent_observability_enabled, is_installed, ) @@ -104,67 +105,49 @@ def test_is_agent_observability_enabled_various_values(self): del os.environ[AGENT_OBSERVABILITY_ENABLED] self.assertFalse(is_agent_observability_enabled()) + def test_get_aws_session_with_botocore(self): + """Test get_aws_session when botocore is installed""" + with patch("amazon.opentelemetry.distro._utils.IS_BOTOCORE_INSTALLED", True): + with patch("botocore.session.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + session = get_aws_session() + self.assertEqual(session, mock_session) + mock_session_class.assert_called_once() + + def test_get_aws_session_without_botocore(self): + """Test get_aws_session when botocore is not installed""" + with patch("amazon.opentelemetry.distro._utils.IS_BOTOCORE_INSTALLED", False): + session = get_aws_session() + self.assertIsNone(session) + def test_get_aws_region_with_botocore(self): """Test get_aws_region when botocore is available and returns a region""" - with patch("amazon.opentelemetry.distro._utils.is_installed") as mock_is_installed: - mock_is_installed.return_value = True - - # Create a mock botocore session - mock_session_class = MagicMock() - mock_session_instance = MagicMock() - mock_session_instance.region_name = "us-east-1" - mock_session_class.Session.return_value = mock_session_instance + with patch("amazon.opentelemetry.distro._utils.get_aws_session") as mock_get_session: + mock_session = MagicMock() + mock_session.get_config_variable.return_value = "us-east-1" + mock_get_session.return_value = mock_session - # Patch the import statement directly in the function - with patch.dict("sys.modules", {"botocore": MagicMock(session=mock_session_class)}): - region = get_aws_region() - self.assertEqual(region, "us-east-1") + region = get_aws_region() + self.assertEqual(region, "us-east-1") + mock_session.get_config_variable.assert_called_once_with("region") def test_get_aws_region_without_botocore(self): """Test get_aws_region when botocore is not installed""" - with patch("amazon.opentelemetry.distro._utils.is_installed") as mock_is_installed: - mock_is_installed.return_value = False + with patch("amazon.opentelemetry.distro._utils.get_aws_session") as mock_get_session: + mock_get_session.return_value = None region = get_aws_region() self.assertIsNone(region) def test_get_aws_region_botocore_no_region(self): """Test get_aws_region when botocore is available but returns no region""" - with patch("amazon.opentelemetry.distro._utils.is_installed") as mock_is_installed: - mock_is_installed.return_value = True - - # Create a mock botocore session with no region - mock_session_class = MagicMock() - mock_session_instance = MagicMock() - mock_session_instance.region_name = None - mock_session_class.Session.return_value = mock_session_instance - - # Patch the import statement directly in the function - with patch.dict("sys.modules", {"botocore": MagicMock(session=mock_session_class)}): - region = get_aws_region() - self.assertIsNone(region) - - def test_get_aws_region_botocore_import_error(self): - """Test get_aws_region when botocore import fails""" - with patch("amazon.opentelemetry.distro._utils.is_installed") as mock_is_installed: - mock_is_installed.return_value = True - - # Mock ImportError when trying to import botocore - with patch.dict("sys.modules", {"botocore": None}): - with patch("builtins.__import__", side_effect=ImportError("Botocore not found")): - region = get_aws_region() - self.assertIsNone(region) - - def test_get_aws_region_botocore_attribute_error(self): - """Test get_aws_region when botocore has attribute errors""" - with patch("amazon.opentelemetry.distro._utils.is_installed") as mock_is_installed: - mock_is_installed.return_value = True - - # Mock the botocore import with AttributeError on Session - mock_session_module = MagicMock() - mock_session_module.Session.side_effect = AttributeError("Session class not found") - - # Patch the import statement directly in the function - with patch.dict("sys.modules", {"botocore": MagicMock(session=mock_session_module)}): - region = get_aws_region() - self.assertIsNone(region) + with patch("amazon.opentelemetry.distro._utils.get_aws_session") as mock_get_session: + mock_session = MagicMock() + mock_session.get_config_variable.return_value = None + mock_get_session.return_value = mock_session + + region = get_aws_region() + self.assertIsNone(region) + mock_session.get_config_variable.assert_called_once_with("region") From 0b439203267c544370a2ce07df28d277166ec2fc Mon Sep 17 00:00:00 2001 From: liustve Date: Mon, 30 Jun 2025 22:32:59 +0000 Subject: [PATCH 28/52] add explicit setting for aws region from environment variables --- .../src/amazon/opentelemetry/distro/_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py index deb652ca3..42e4494a2 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py @@ -46,7 +46,11 @@ def get_aws_session(): # pylint: disable=import-outside-toplevel from botocore.session import Session - return Session() + session = Session() + region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") + if region: + session.set_config_variable("region", region) + return session return None From ff2fb5d6cadc1d16e10f96ebb756c5002f75de0c Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 1 Jul 2025 06:39:11 +0000 Subject: [PATCH 29/52] refactored otlp aws log exporter, add comments aws batch log processor --- .../logs/aws_batch_log_record_processor.py | 58 +++++++++--- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 94 ++++++++++--------- .../otlp/aws/traces/otlp_aws_span_exporter.py | 8 ++ .../aws/logs/test_otlp_aws_logs_exporter.py | 48 ++++++++-- 4 files changed, 141 insertions(+), 67 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index e68c61750..9b6761a5b 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -1,5 +1,6 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +# Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. import logging from typing import Mapping, Optional, Sequence, cast @@ -19,7 +20,7 @@ class AwsCloudWatchOtlpBatchLogRecordProcessor(BatchLogRecordProcessor): Custom implementation of BatchLogRecordProcessor that manages log record batching with size-based constraints to prevent exceeding AWS CloudWatch Logs OTLP endpoint request size limits. - This processor still exports all logs up to _max_export_batch_size but rather than doing exactly + This processor still exports all logs up to _MAX_LOG_REQUEST_BYTE_SIZE but rather than doing exactly one export, we will estimate log sizes and do multiple batch exports where each exported batch will have an additional constraint: @@ -29,9 +30,41 @@ class AwsCloudWatchOtlpBatchLogRecordProcessor(BatchLogRecordProcessor): A unique case is if the sub-batch is of data size > 1 MB, then the sub-batch will have exactly 1 log in it. """ - _BASE_LOG_BUFFER_BYTE_SIZE = ( - 1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation - ) + # OTel log events include fixed metadata attributes so the estimated metadata size + # possibly be calculated as this with best efforts: + # service.name (255 chars) + cloud.resource_id (max ARN length) + telemetry.xxx (~20 chars) + + # common attributes (255 chars) + + # scope + flags + traceId + spanId + numeric/timestamp fields + ... + # Example log structure: + # { + # "resource": { + # "attributes": { + # "aws.local.service": "example-service123", + # "telemetry.sdk.language": "python", + # "service.name": "my-application", + # "cloud.resource_id": "example-resource", + # "aws.log.group.names": "example-log-group", + # "aws.ai.agent.type": "default", + # "telemetry.sdk.version": "1.x.x", + # "telemetry.auto.version": "0.x.x", + # "telemetry.sdk.name": "opentelemetry" + # } + # }, + # "scope": {"name": "example.instrumentation.library"}, + # "timeUnixNano": 1234567890123456789, + # "observedTimeUnixNano": 1234567890987654321, + # "severityNumber": 9, + # "body": {...}, + # "attributes": {...}, + # "flags": 1, + # "traceId": "abcd1234efgh5678ijkl9012mnop3456", + # "spanId": "1234abcd5678efgh" + # } + # 2000 might be a bit of an overestimate but it's better to overestimate the size of the log + # and suffer a small performance impact with batching than it is to underestimate and risk + # a large log being dropped when sent to the AWS otlp endpoint. + _BASE_LOG_BUFFER_BYTE_SIZE = 2000 + _MAX_LOG_REQUEST_BYTE_SIZE = ( 1048576 # Maximum uncompressed/unserialized bytes / request - # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html @@ -63,10 +96,11 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None: https://github.com/open-telemetry/opentelemetry-python/blob/bb21ebd46d070c359eee286c97bdf53bfd06759d/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py#L143 Preserves existing batching behavior but will intermediarly export small log batches if - the size of the data in the batch is at or above AWS CloudWatch's maximum request size limit of 1 MB. + the size of the data in the batch is estimated to be at or above AWS CloudWatch's + maximum request size limit of 1 MB. - - Data size of exported batches will ALWAYS be <= 1 MB except for the case below: - - If the data size of an exported batch is ever > 1 MB then the batch size is guaranteed to be 1 + - Estimated data size of exported batches will typically be <= 1 MB except for the case below: + - If the estimated data size of an exported batch is ever > 1 MB then the batch size is guaranteed to be 1 """ with self._export_lock: iteration = 0 @@ -141,19 +175,17 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: di if next_val is None: continue - if isinstance(next_val, bool): - size += 4 if next_val else 5 - continue - if isinstance(next_val, (str, bytes)): size += len(next_val) continue - if isinstance(next_val, (float, int)): + if isinstance(next_val, (float, int, bool)): size += len(str(next_val)) continue - # next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"], + # next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"] + # See: https://github.com/open-telemetry/opentelemetry-python/blob/\ + # 9426d6da834cfb4df7daedd4426bba0aa83165b5/opentelemetry-api/src/opentelemetry/util/types.py#L20 if current_depth <= depth: obj_id = id( next_val diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index f5aa32460..772240d05 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -1,10 +1,12 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +# Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. import gzip import logging +import random from io import BytesIO -from time import sleep +from time import sleep, time from typing import Dict, Optional, Sequence from requests import Response @@ -12,7 +14,6 @@ from requests.structures import CaseInsensitiveDict from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession -from opentelemetry.exporter.otlp.proto.common._internal import _create_exp_backoff_generator from opentelemetry.exporter.otlp.proto.common._log_encoder import encode_logs from opentelemetry.exporter.otlp.proto.http import Compression from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter @@ -20,9 +21,17 @@ from opentelemetry.sdk._logs.export import LogExportResult _logger = logging.getLogger(__name__) +_MAX_RETRYS = 6 class OTLPAwsLogExporter(OTLPLogExporter): + """ + This exporter extends the functionality of the OTLPLogExporter to allow logs to be exported + to the CloudWatch Logs OTLP endpoint https://logs.[AWSRegion].amazonaws.com/v1/logs. Utilizes the aws-sdk + library to sign and directly inject SigV4 Authentication to the exported request's headers. + + See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html + """ _RETRY_AFTER_HEADER = "Retry-After" # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling @@ -56,12 +65,13 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: """ Exports log batch with AWS-specific enhancements over the base OTLPLogExporter. - Based on upstream implementation which does not retry based on Retry-After header: - https://github.com/open-telemetry/opentelemetry-python/blob/acae2c232b101d3e447a82a7161355d66aa06fa2/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py#L167 + Key differences from upstream OTLPLogExporter: + 1. Respects Retry-After header from server responses for proper throttling + 2. Treats HTTP 429 (Too Many Requests) as a retryable exception + 3. Always compresses data with gzip before sending - Key behaviors: - 1. Always compresses data with gzip before sending - 2. Implements Retry-After header support for throttling responses + Upstream implementation does not support Retry-After header: + https://github.com/open-telemetry/opentelemetry-python/blob/acae2c232b101d3e447a82a7161355d66aa06fa2/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py#L167 """ if self._shutdown: @@ -74,52 +84,50 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: gzip_stream.write(serialized_data) data = gzip_data.getvalue() - backoff = _create_exp_backoff_generator(max_value=self._MAX_RETRY_TIMEOUT) + deadline_sec = time() + self._timeout + retry_num = 0 # This loop will eventually terminate because: # 1) The export request will eventually either succeed or fail permanently - # 2) The exponential backoff generator has a max value of _MAX_RETRY_TIMEOUT (64s) - # 3) After enough retries, delay will equal _MAX_RETRY_TIMEOUT, forcing exit + # 2) Maximum retries (_MAX_RETRYS = 6) will be reached + # 3) Deadline timeout will be exceeded # 4) Non-retryable errors (4xx except 429) immediately exit the loop while True: - resp = self._send(data) + resp = self._send(data, deadline_sec - time()) if resp.ok: return LogExportResult.SUCCESS - delay = self._get_retry_delay_sec(resp.headers, backoff) + backoff_seconds = self._get_retry_delay_sec(resp.headers, retry_num) is_retryable = self._retryable(resp) - if not is_retryable or delay == self._MAX_RETRY_TIMEOUT: - if is_retryable: - _logger.error( - "Failed to export logs due to retries exhausted " - "after transient error %s encountered while exporting logs batch", - resp.reason, - ) - else: - _logger.error( - "Failed to export logs batch code: %s, reason: %s", - resp.status_code, - resp.text, - ) + if not is_retryable or retry_num + 1 == _MAX_RETRYS or backoff_seconds > (deadline_sec - time()): + _logger.error( + "Failed to export logs batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) return LogExportResult.FAILURE _logger.warning( - "Transient error %s encountered while exporting logs batch, retrying in %ss.", + "Transient error %s encountered while exporting logs batch, retrying in %.2fs.", resp.reason, - delay, + backoff_seconds, ) - sleep(delay) + # Make sleep interruptible by checking shutdown status + if self._shutdown: + return LogExportResult.FAILURE + sleep(backoff_seconds) + retry_num += 1 - def _send(self, serialized_data: bytes): + def _send(self, serialized_data: bytes, timeout_sec: float): try: response = self._session.post( url=self._endpoint, data=serialized_data, verify=self._certificate_file, - timeout=self._timeout, + timeout=timeout_sec, cert=self._client_cert, ) return response @@ -128,7 +136,7 @@ def _send(self, serialized_data: bytes): url=self._endpoint, data=serialized_data, verify=self._certificate_file, - timeout=self._timeout, + timeout=timeout_sec, cert=self._client_cert, ) return response @@ -136,29 +144,23 @@ def _send(self, serialized_data: bytes): @staticmethod def _retryable(resp: Response) -> bool: """ - Is it a retryable response? + Logic based on https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling """ # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling return resp.status_code in (429, 503) or OTLPLogExporter._retryable(resp) - def _get_retry_delay_sec(self, headers: CaseInsensitiveDict, backoff) -> float: + def _get_retry_delay_sec(self, headers: CaseInsensitiveDict, retry_num: int) -> float: """ Get retry delay in seconds from headers or backoff strategy. """ - # See: https://opentelemetry.io/docs/specs/otlp/#otlphttp-throttling - maybe_retry_after = headers.get(self._RETRY_AFTER_HEADER, None) - - # Set the next retry delay to the value of the Retry-After response in the headers. - # If Retry-After is not present in the headers, default to the next iteration of the - # exponential backoff strategy. - - delay = self._parse_retryable_header(maybe_retry_after) - - if delay == -1: - delay = next(backoff, self._MAX_RETRY_TIMEOUT) - - return delay + # Check for Retry-After header first, then use exponential backoff with jitter + retry_after_delay = self._parse_retryable_header(headers.get(self._RETRY_AFTER_HEADER)) + if retry_after_delay > -1: + return retry_after_delay + else: + # multiplying by a random number between .8 and 1.2 introduces a +/-20% jitter to each backoff. + return 2**retry_num * random.uniform(0.8, 1.2) @staticmethod def _parse_retryable_header(retry_header: Optional[str]) -> float: diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py index 7c608e885..7f44b04e4 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py @@ -18,6 +18,14 @@ class OTLPAwsSpanExporter(OTLPSpanExporter): + """ + This exporter extends the functionality of the OTLPSpanExporter to allow spans to be exported + to the XRay OTLP endpoint https://xray.[AWSRegion].amazonaws.com/v1/traces. Utilizes the + AwsAuthSession to sign and directly inject SigV4 Authentication to the exported request's headers. + + See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html + """ + def __init__( self, endpoint: Optional[str] = None, diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 31e401643..6bc7d0ff4 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -7,7 +7,7 @@ import requests from requests.structures import CaseInsensitiveDict -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import _MAX_RETRYS, OTLPAwsLogExporter from opentelemetry._logs.severity import SeverityNumber from opentelemetry.sdk._logs import LogData, LogRecord from opentelemetry.sdk._logs.export import LogExportResult @@ -80,18 +80,21 @@ def test_should_not_export_again_if_not_retryable(self, mock_request): def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header(self, mock_request, mock_sleep): """Tests that multiple export requests are made with exponential delay if the response status code is retryable. But there is no Retry-After header.""" + self.exporter._timeout = 10000 # Large timeout to avoid early exit result = self.exporter.export(self.logs) - # 1, 2, 4, 8, 16, 32 delays - self.assertEqual(mock_sleep.call_count, 6) + self.assertEqual(mock_sleep.call_count, _MAX_RETRYS - 1) delays = mock_sleep.call_args_list for index, delay in enumerate(delays): - self.assertEqual(delay[0][0], 2**index) + expected_base = 2**index + actual_delay = delay[0][0] + # Assert delay is within jitter range: base * [0.8, 1.2] + self.assertGreaterEqual(actual_delay, expected_base * 0.8) + self.assertLessEqual(actual_delay, expected_base * 1.2) - # Number of calls: 1 + len(1, 2, 4, 8, 16, 32 delays) - self.assertEqual(mock_request.call_count, 7) + self.assertEqual(mock_request.call_count, _MAX_RETRYS) self.assertEqual(result, LogExportResult.FAILURE) @patch( @@ -104,6 +107,7 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header def test_should_export_again_with_server_delay_if_retryable_and_retry_after_header(self, mock_request, mock_sleep): """Tests that multiple export requests are made with the server's suggested delay if the response status code is retryable and there is a Retry-After header.""" + self.exporter._timeout = 10000 # Large timeout to avoid early exit result = self.exporter.export(self.logs) delays = mock_sleep.call_args_list @@ -130,12 +134,17 @@ def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after self, mock_request, mock_sleep ): """Tests that multiple export requests are made with exponential delay if the response status code is retryable. - but the Retry-After header ins invalid or malformed.""" + but the Retry-After header is invalid or malformed.""" + self.exporter._timeout = 10000 # Large timeout to avoid early exit result = self.exporter.export(self.logs) delays = mock_sleep.call_args_list for index, delay in enumerate(delays): - self.assertEqual(delay[0][0], 2**index) + expected_base = 2**index + actual_delay = delay[0][0] + # Assert delay is within jitter range: base * [0.8, 1.2] + self.assertGreaterEqual(actual_delay, expected_base * 0.8) + self.assertLessEqual(actual_delay, expected_base * 1.2) self.assertEqual(mock_sleep.call_count, 3) self.assertEqual(mock_request.call_count, 4) @@ -149,6 +158,29 @@ def test_export_connection_error_retry(self, mock_request): self.assertEqual(mock_request.call_count, 2) self.assertEqual(result, LogExportResult.SUCCESS) + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None + ) + @patch("requests.Session.post", return_value=retryable_response_no_header) + def test_should_stop_retrying_when_deadline_exceeded(self, mock_request, mock_sleep): + """Tests that the exporter stops retrying when the deadline is exceeded.""" + self.exporter._timeout = 5 # Short timeout to trigger deadline check + + # Mock time to simulate time passing + with patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.time") as mock_time: + # First call returns start time, subsequent calls simulate time passing + mock_time.side_effect = [0, 0, 1, 2, 4, 8] # Exponential backoff would be 1, 2, 4 seconds + + result = self.exporter.export(self.logs) + + # Should stop before max retries due to deadline + self.assertLess(mock_sleep.call_count, _MAX_RETRYS) + self.assertLess(mock_request.call_count, _MAX_RETRYS + 1) + self.assertEqual(result, LogExportResult.FAILURE) + + # Verify total time passed is at the timeout limit + self.assertGreaterEqual(5, self.exporter._timeout) + @staticmethod def generate_test_log_data(count=5): logs = [] From bce91dc856146af7743f74df0f25640166495621 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 1 Jul 2025 18:14:09 +0000 Subject: [PATCH 30/52] linting fix --- .../distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 772240d05..d08288ec1 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -158,9 +158,8 @@ def _get_retry_delay_sec(self, headers: CaseInsensitiveDict, retry_num: int) -> retry_after_delay = self._parse_retryable_header(headers.get(self._RETRY_AFTER_HEADER)) if retry_after_delay > -1: return retry_after_delay - else: - # multiplying by a random number between .8 and 1.2 introduces a +/-20% jitter to each backoff. - return 2**retry_num * random.uniform(0.8, 1.2) + # multiplying by a random number between .8 and 1.2 introduces a +/-20% jitter to each backoff. + return 2**retry_num * random.uniform(0.8, 1.2) @staticmethod def _parse_retryable_header(retry_header: Optional[str]) -> float: From 76e4b47f1485ebae840b5a25406a52f8a41a66dd Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 1 Jul 2025 18:23:12 +0000 Subject: [PATCH 31/52] remove shut down check before sleep --- .../distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index d08288ec1..fe95e507a 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -114,10 +114,6 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: resp.reason, backoff_seconds, ) - - # Make sleep interruptible by checking shutdown status - if self._shutdown: - return LogExportResult.FAILURE sleep(backoff_seconds) retry_num += 1 From f0ebea203f8664fc33b58f72544c513083592b89 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 1 Jul 2025 18:45:10 +0000 Subject: [PATCH 32/52] linting fix --- .../otlp/aws/logs/aws_batch_log_record_processor.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index afa715299..3a20c4366 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -175,19 +175,17 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: di if next_val is None: continue - if isinstance(next_val, bool): - size += 4 if next_val else 5 - continue - if isinstance(next_val, (str, bytes)): size += len(next_val) continue - if isinstance(next_val, (float, int)): + if isinstance(next_val, (float, int, bool)): size += len(str(next_val)) continue - # next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"], + # next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"] + # See: https://github.com/open-telemetry/opentelemetry-python/blob/\ + # 9426d6da834cfb4df7daedd4426bba0aa83165b5/opentelemetry-api/src/opentelemetry/util/types.py#L20 if current_depth <= depth: obj_id = id( next_val From 6dd6a6733f7362bc27deb92211096152a7a5c154 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 19:23:46 +0000 Subject: [PATCH 33/52] add better estimation for non-ascii characters --- .../logs/aws_batch_log_record_processor.py | 20 ++++++++++++++++--- .../test_aws_batch_log_record_processor.py | 4 ++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index 9b6761a5b..de94bfcaf 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -175,12 +175,12 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: di if next_val is None: continue - if isinstance(next_val, (str, bytes)): + if isinstance(next_val, bytes): size += len(next_val) continue - if isinstance(next_val, (float, int, bool)): - size += len(str(next_val)) + if isinstance(next_val, (str, float, int, bool)): + size += AwsCloudWatchOtlpBatchLogRecordProcessor._estimate_utf8_size(str(next_val)) continue # next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"] @@ -210,3 +210,17 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: di queue = new_queue return size + + @staticmethod + def _estimate_utf8_size(s: str): + ascii_count = 0 + non_ascii_count = 0 + + for char in s: + if ord(char) < 128: + ascii_count += 1 + else: + non_ascii_count += 1 + + # Estimate: ASCII chars (1 byte) + upper bound of non-ASCII chars 4 bytes + return ascii_count + (non_ascii_count * 4) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 3a7bb5ef9..e8cb060c8 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -116,8 +116,8 @@ def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): def test_process_log_data_primitive(self): - primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None] - expected_sizes = [4, 4, 1, 3, 4, 5, 0] + primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None, "深入 Python", "café"] + expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("caf")] for index, primitive in enumerate(primitives): log = self.generate_test_log_data(log_body=primitive, count=1) From 502eb01b1c55293395db97755b9c2a374c32e5c6 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 19:51:13 +0000 Subject: [PATCH 34/52] linting + formatting fix --- .../logs/aws_batch_log_record_processor.py | 35 +++++++++++++++---- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 4 +-- .../otlp/aws/common/test_aws_auth_session.py | 15 -------- .../test_aws_batch_log_record_processor.py | 2 +- .../test_aws_opentelementry_configurator.py | 21 +++++++++++ 5 files changed, 53 insertions(+), 24 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py index de94bfcaf..046d98e2a 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py @@ -1,6 +1,6 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright The OpenTelemetry Authors # SPDX-License-Identifier: Apache-2.0 -# Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. +# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. import logging from typing import Mapping, Optional, Sequence, cast @@ -136,10 +136,33 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: di """ Estimates the size in bytes of a log by calculating the size of its body and its attributes and adding a buffer amount to account for other log metadata information. - Will process complex log structures up to the specified depth limit. - Includes cycle detection to prevent processing the log content more than once. - If the depth limit of the log structure is exceeded, returns the truncated calculation - to everything up to that point. + + Features: + - Processes complex log structures up to the specified depth limit + - Includes cycle detection to prevent processing the same content more than once + - Returns truncated calculation if depth limit is exceeded + + We set depth to 3 as this is the minimum required depth to estimate our consolidated Gen AI log events: + + Example structure: + { + "output": { + "messages": [ + { + "content": "Hello, World!", + "role": "assistant" + } + ] + }, + "input": { + "messages": [ + { + "content": "Say Hello, World!", + "role": "user" + } + ] + } + } Args: log: The Log object to calculate size for diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index fe95e507a..9467bb681 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -1,6 +1,6 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright The OpenTelemetry Authors # SPDX-License-Identifier: Apache-2.0 -# Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. +# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. import gzip import logging diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py index 85a6c8958..7d6479251 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -63,18 +63,3 @@ def test_aws_auth_session(self, _, __): self.assertIn(AUTHORIZATION_HEADER, actual_headers) self.assertIn(X_AMZ_DATE_HEADER, actual_headers) self.assertIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) - - @patch("requests.Session.request", return_value=requests.Response()) - @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) - @patch("botocore.auth.SigV4Auth.add_auth", side_effect=Exception("Signing failed")) - def test_aws_auth_session_signing_error(self, mock_add_auth, mock_get_credentials, mock_request): - """Tests that aws_auth_session does not any Sigv4 headers if signing errors.""" - - session = AwsAuthSession("us-east-1", "xray") - actual_headers = {"test": "test"} - - session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) - - self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) - self.assertNotIn(X_AMZ_DATE_HEADER, actual_headers) - self.assertNotIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index e8cb060c8..8317938fd 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -117,7 +117,7 @@ def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): def test_process_log_data_primitive(self): primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None, "深入 Python", "café"] - expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("caf")] + expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("calf")] for index, primitive in enumerate(primitives): log = self.generate_test_log_data(log_body=primitive, count=1) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 6ce9a72f2..248c3546e 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -26,6 +26,7 @@ OtlpLogHeaderSetting, _check_emf_exporter_enabled, _custom_import_sampler, + _customize_log_record_processor, _customize_logs_exporter, _customize_metric_exporters, _customize_resource, @@ -1009,6 +1010,26 @@ def test_validate_and_fetch_logs_header(self): # Clean up os.environ.pop(OTEL_EXPORTER_OTLP_LOGS_HEADERS, None) + @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.is_agent_observability_enabled") + @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._is_aws_otlp_endpoint") + def test_customize_log_record_processor_with_agent_observability(self, mock_is_aws_endpoint, mock_is_agent_enabled): + """Test that AwsCloudWatchOtlpBatchLogRecordProcessor is used when agent observability is enabled and endpoint is logs endpoint""" + from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( + AwsCloudWatchOtlpBatchLogRecordProcessor, + ) + from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter + from opentelemetry.sdk._logs.export import BatchLogRecordProcessor + + # Mock the OTLPAwsLogExporter + mock_exporter = MagicMock(spec=OTLPAwsLogExporter) + + # Test case 1: Agent observability enabled and AWS logs endpoint + mock_is_agent_enabled.return_value = True + mock_is_aws_endpoint.return_value = True + + processor = _customize_log_record_processor(mock_exporter) + self.assertIsInstance(processor, AwsCloudWatchOtlpBatchLogRecordProcessor) + @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._validate_and_fetch_logs_header") @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.is_installed") def test_create_emf_exporter(self, mock_is_installed, mock_validate): From b30ad4fc0255cef00a06d4c87a0c4210de1690fe Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 20:01:14 +0000 Subject: [PATCH 35/52] fix unit test --- .../otlp/aws/logs/test_aws_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py index 8317938fd..e8cb060c8 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py @@ -117,7 +117,7 @@ def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): def test_process_log_data_primitive(self): primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None, "深入 Python", "café"] - expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("calf")] + expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("caf")] for index, primitive in enumerate(primitives): log = self.generate_test_log_data(log_body=primitive, count=1) From 8b7e671d3ce42f3cccfc26c5b95f0aee26df0c8c Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 20:02:54 +0000 Subject: [PATCH 36/52] linting fix --- .../distro/test_aws_opentelementry_configurator.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 248c3546e..da1e5fbdc 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -47,6 +47,9 @@ from amazon.opentelemetry.distro.aws_span_metrics_processor import AwsSpanMetricsProcessor from amazon.opentelemetry.distro.exporter.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession +from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( + AwsCloudWatchOtlpBatchLogRecordProcessor, +) from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter @@ -1014,12 +1017,6 @@ def test_validate_and_fetch_logs_header(self): @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._is_aws_otlp_endpoint") def test_customize_log_record_processor_with_agent_observability(self, mock_is_aws_endpoint, mock_is_agent_enabled): """Test that AwsCloudWatchOtlpBatchLogRecordProcessor is used when agent observability is enabled and endpoint is logs endpoint""" - from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( - AwsCloudWatchOtlpBatchLogRecordProcessor, - ) - from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter - from opentelemetry.sdk._logs.export import BatchLogRecordProcessor - # Mock the OTLPAwsLogExporter mock_exporter = MagicMock(spec=OTLPAwsLogExporter) From dc98cf80d2ad53aae8c2629d351131a41e8b7229 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 20:43:54 +0000 Subject: [PATCH 37/52] add interruptible shutdown --- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 15 ++++- .../aws/logs/test_otlp_aws_logs_exporter.py | 57 +++++++++++-------- 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 9467bb681..737fafdb8 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -6,7 +6,8 @@ import logging import random from io import BytesIO -from time import sleep, time +from threading import Event +from time import time from typing import Dict, Optional, Sequence from requests import Response @@ -60,6 +61,7 @@ def __init__( compression=Compression.Gzip, session=AwsAuthSession(aws_region=self._aws_region, service="logs"), ) + self._shutdown_event = Event() def export(self, batch: Sequence[LogData]) -> LogExportResult: """ @@ -114,9 +116,18 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: resp.reason, backoff_seconds, ) - sleep(backoff_seconds) + # Use interruptible sleep that can be interrupted by shutdown + if self._shutdown_event.wait(backoff_seconds): + _logger.info("Export interrupted by shutdown") + return LogExportResult.FAILURE + retry_num += 1 + def shutdown(self) -> None: + """Shutdown the exporter and interrupt any ongoing waits.""" + self._shutdown_event.set() + return super().shutdown() + def _send(self, serialized_data: bytes, timeout_sec: float): try: response = self._session.post( diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 6bc7d0ff4..f3e587e02 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -73,19 +73,17 @@ def test_should_not_export_again_if_not_retryable(self, mock_request): self.assertEqual(result, LogExportResult.FAILURE) - @patch( - "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None - ) + @patch("threading.Event.wait", side_effect=lambda x: False) @patch("requests.Session.post", return_value=retryable_response_no_header) - def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header(self, mock_request, mock_sleep): + def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header(self, mock_request, mock_wait): """Tests that multiple export requests are made with exponential delay if the response status code is retryable. But there is no Retry-After header.""" self.exporter._timeout = 10000 # Large timeout to avoid early exit result = self.exporter.export(self.logs) - self.assertEqual(mock_sleep.call_count, _MAX_RETRYS - 1) + self.assertEqual(mock_wait.call_count, _MAX_RETRYS - 1) - delays = mock_sleep.call_args_list + delays = mock_wait.call_args_list for index, delay in enumerate(delays): expected_base = 2**index @@ -97,30 +95,26 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header self.assertEqual(mock_request.call_count, _MAX_RETRYS) self.assertEqual(result, LogExportResult.FAILURE) - @patch( - "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None - ) + @patch("threading.Event.wait", side_effect=lambda x: False) @patch( "requests.Session.post", side_effect=[retryable_response_header, retryable_response_header, retryable_response_header, good_response], ) - def test_should_export_again_with_server_delay_if_retryable_and_retry_after_header(self, mock_request, mock_sleep): + def test_should_export_again_with_server_delay_if_retryable_and_retry_after_header(self, mock_request, mock_wait): """Tests that multiple export requests are made with the server's suggested delay if the response status code is retryable and there is a Retry-After header.""" self.exporter._timeout = 10000 # Large timeout to avoid early exit result = self.exporter.export(self.logs) - delays = mock_sleep.call_args_list + delays = mock_wait.call_args_list for delay in delays: self.assertEqual(delay[0][0], 10) - self.assertEqual(mock_sleep.call_count, 3) + self.assertEqual(mock_wait.call_count, 3) self.assertEqual(mock_request.call_count, 4) self.assertEqual(result, LogExportResult.SUCCESS) - @patch( - "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None - ) + @patch("threading.Event.wait", side_effect=lambda x: False) @patch( "requests.Session.post", side_effect=[ @@ -131,13 +125,13 @@ def test_should_export_again_with_server_delay_if_retryable_and_retry_after_head ], ) def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after_header( - self, mock_request, mock_sleep + self, mock_request, mock_wait ): """Tests that multiple export requests are made with exponential delay if the response status code is retryable. but the Retry-After header is invalid or malformed.""" self.exporter._timeout = 10000 # Large timeout to avoid early exit result = self.exporter.export(self.logs) - delays = mock_sleep.call_args_list + delays = mock_wait.call_args_list for index, delay in enumerate(delays): expected_base = 2**index @@ -146,7 +140,7 @@ def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after self.assertGreaterEqual(actual_delay, expected_base * 0.8) self.assertLessEqual(actual_delay, expected_base * 1.2) - self.assertEqual(mock_sleep.call_count, 3) + self.assertEqual(mock_wait.call_count, 3) self.assertEqual(mock_request.call_count, 4) self.assertEqual(result, LogExportResult.SUCCESS) @@ -158,15 +152,12 @@ def test_export_connection_error_retry(self, mock_request): self.assertEqual(mock_request.call_count, 2) self.assertEqual(result, LogExportResult.SUCCESS) - @patch( - "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.sleep", side_effect=lambda x: None - ) + @patch("threading.Event.wait", side_effect=lambda x: False) @patch("requests.Session.post", return_value=retryable_response_no_header) - def test_should_stop_retrying_when_deadline_exceeded(self, mock_request, mock_sleep): + def test_should_stop_retrying_when_deadline_exceeded(self, mock_request, mock_wait): """Tests that the exporter stops retrying when the deadline is exceeded.""" self.exporter._timeout = 5 # Short timeout to trigger deadline check - # Mock time to simulate time passing with patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.time") as mock_time: # First call returns start time, subsequent calls simulate time passing mock_time.side_effect = [0, 0, 1, 2, 4, 8] # Exponential backoff would be 1, 2, 4 seconds @@ -174,13 +165,31 @@ def test_should_stop_retrying_when_deadline_exceeded(self, mock_request, mock_sl result = self.exporter.export(self.logs) # Should stop before max retries due to deadline - self.assertLess(mock_sleep.call_count, _MAX_RETRYS) + self.assertLess(mock_wait.call_count, _MAX_RETRYS) self.assertLess(mock_request.call_count, _MAX_RETRYS + 1) self.assertEqual(result, LogExportResult.FAILURE) # Verify total time passed is at the timeout limit self.assertGreaterEqual(5, self.exporter._timeout) + @patch("requests.Session.post", return_value=retryable_response_no_header) + def test_export_interrupted_by_shutdown(self, mock_request): + """Tests that export can be interrupted by shutdown during retry wait.""" + self.exporter._timeout = 10000 + + # Mock Event.wait to call shutdown on first call, then return True (interrupted) + # We cannot call shutdown() at the beginning since the exporter would just automatically return a FAILURE result without even attempting the export. + def mock_wait_with_shutdown(timeout): + self.exporter.shutdown() + return True + + with patch.object(self.exporter._shutdown_event, 'wait', side_effect=mock_wait_with_shutdown): + result = self.exporter.export(self.logs) + + # Should make one request, then get interrupted during retry wait + self.assertEqual(mock_request.call_count, 1) + self.assertEqual(result, LogExportResult.FAILURE) + @staticmethod def generate_test_log_data(count=5): logs = [] From 3450a114b95a6976808968e23b8d56200df4c721 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 21:34:56 +0000 Subject: [PATCH 38/52] fix sleep unit tests + renaming aws batch log processor --- .../distro/aws_opentelemetry_configurator.py | 2 +- ...aws_cw_otlp_batch_log_record_processor.py} | 4 +- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 6 +-- ...aws_cw_otlp_batch_log_record_processor.py} | 20 ++++---- .../aws/logs/test_otlp_aws_logs_exporter.py | 48 ++++++++++++------- .../test_aws_opentelementry_configurator.py | 2 +- 6 files changed, 47 insertions(+), 35 deletions(-) rename aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/{aws_batch_log_record_processor.py => _aws_cw_otlp_batch_log_record_processor.py} (98%) rename aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/{test_aws_batch_log_record_processor.py => test_aws_cw_otlp_batch_log_record_processor.py} (90%) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 2d7c603cf..2710996bc 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -23,7 +23,7 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( +from amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor import ( AwsCloudWatchOtlpBatchLogRecordProcessor, ) from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/_aws_cw_otlp_batch_log_record_processor.py similarity index 98% rename from aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py rename to aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/_aws_cw_otlp_batch_log_record_processor.py index 046d98e2a..5ad3c2c8e 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/_aws_cw_otlp_batch_log_record_processor.py @@ -1,6 +1,6 @@ -# Copyright The OpenTelemetry Authors +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. import logging from typing import Mapping, Optional, Sequence, cast diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 737fafdb8..16a976d54 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -1,6 +1,6 @@ -# Copyright The OpenTelemetry Authors +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. import gzip import logging @@ -120,7 +120,7 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: if self._shutdown_event.wait(backoff_seconds): _logger.info("Export interrupted by shutdown") return LogExportResult.FAILURE - + retry_num += 1 def shutdown(self) -> None: diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py similarity index 90% rename from aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py rename to aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py index e8cb060c8..adfa1eecc 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py @@ -5,7 +5,7 @@ from typing import List from unittest.mock import MagicMock, patch -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( +from amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor import ( AwsCloudWatchOtlpBatchLogRecordProcessor, BatchLogExportStrategy, ) @@ -136,11 +136,11 @@ def test_process_log_data_with_cycle(self): self.assertEqual(actual_size, expected_size) @patch( - "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + "amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.attach", return_value=MagicMock(), ) - @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") - @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.set_value") def test_export_single_batch_under_size_limit(self, _, __, ___): """Tests that export is only called once if a single batch is under the size limit""" log_count = 10 @@ -163,11 +163,11 @@ def test_export_single_batch_under_size_limit(self, _, __, ___): self.mock_exporter.export.assert_called_once() @patch( - "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + "amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.attach", return_value=MagicMock(), ) - @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") - @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.set_value") def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): """Should make multiple export calls of batch size 1 to export logs of size > 1 MB.""" @@ -188,11 +188,11 @@ def test_export_single_batch_all_logs_over_size_limit(self, _, __, ___): self.assertEqual(len(batch[0]), 1) @patch( - "amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.attach", + "amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.attach", return_value=MagicMock(), ) - @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.detach") - @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor.set_value") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.detach") + @patch("amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor.set_value") def test_export_single_batch_some_logs_over_size_limit(self, _, __, ___): """Should make calls to export smaller sub-batch logs""" large_log_body = "X" * (self.processor._MAX_LOG_REQUEST_BYTE_SIZE + 1) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index f3e587e02..8623a6696 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -73,7 +73,10 @@ def test_should_not_export_again_if_not_retryable(self, mock_request): self.assertEqual(result, LogExportResult.FAILURE) - @patch("threading.Event.wait", side_effect=lambda x: False) + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.Event.wait", + side_effect=lambda x: False, + ) @patch("requests.Session.post", return_value=retryable_response_no_header) def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header(self, mock_request, mock_wait): """Tests that multiple export requests are made with exponential delay if the response status code is retryable. @@ -95,7 +98,10 @@ def test_should_export_again_with_backoff_if_retryable_and_no_retry_after_header self.assertEqual(mock_request.call_count, _MAX_RETRYS) self.assertEqual(result, LogExportResult.FAILURE) - @patch("threading.Event.wait", side_effect=lambda x: False) + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.Event.wait", + side_effect=lambda x: False, + ) @patch( "requests.Session.post", side_effect=[retryable_response_header, retryable_response_header, retryable_response_header, good_response], @@ -105,6 +111,7 @@ def test_should_export_again_with_server_delay_if_retryable_and_retry_after_head delay if the response status code is retryable and there is a Retry-After header.""" self.exporter._timeout = 10000 # Large timeout to avoid early exit result = self.exporter.export(self.logs) + delays = mock_wait.call_args_list for delay in delays: @@ -114,7 +121,10 @@ def test_should_export_again_with_server_delay_if_retryable_and_retry_after_head self.assertEqual(mock_request.call_count, 4) self.assertEqual(result, LogExportResult.SUCCESS) - @patch("threading.Event.wait", side_effect=lambda x: False) + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.Event.wait", + side_effect=lambda x: False, + ) @patch( "requests.Session.post", side_effect=[ @@ -131,6 +141,7 @@ def test_should_export_again_with_backoff_delay_if_retryable_and_bad_retry_after but the Retry-After header is invalid or malformed.""" self.exporter._timeout = 10000 # Large timeout to avoid early exit result = self.exporter.export(self.logs) + delays = mock_wait.call_args_list for index, delay in enumerate(delays): @@ -152,7 +163,10 @@ def test_export_connection_error_retry(self, mock_request): self.assertEqual(mock_request.call_count, 2) self.assertEqual(result, LogExportResult.SUCCESS) - @patch("threading.Event.wait", side_effect=lambda x: False) + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.Event.wait", + side_effect=lambda x: False, + ) @patch("requests.Session.post", return_value=retryable_response_no_header) def test_should_stop_retrying_when_deadline_exceeded(self, mock_request, mock_wait): """Tests that the exporter stops retrying when the deadline is exceeded.""" @@ -172,23 +186,21 @@ def test_should_stop_retrying_when_deadline_exceeded(self, mock_request, mock_wa # Verify total time passed is at the timeout limit self.assertGreaterEqual(5, self.exporter._timeout) + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.Event.wait", + side_effect=lambda x: True, + ) @patch("requests.Session.post", return_value=retryable_response_no_header) - def test_export_interrupted_by_shutdown(self, mock_request): + def test_export_interrupted_by_shutdown(self, mock_request, mock_wait): """Tests that export can be interrupted by shutdown during retry wait.""" self.exporter._timeout = 10000 - - # Mock Event.wait to call shutdown on first call, then return True (interrupted) - # We cannot call shutdown() at the beginning since the exporter would just automatically return a FAILURE result without even attempting the export. - def mock_wait_with_shutdown(timeout): - self.exporter.shutdown() - return True - - with patch.object(self.exporter._shutdown_event, 'wait', side_effect=mock_wait_with_shutdown): - result = self.exporter.export(self.logs) - - # Should make one request, then get interrupted during retry wait - self.assertEqual(mock_request.call_count, 1) - self.assertEqual(result, LogExportResult.FAILURE) + + result = self.exporter.export(self.logs) + + # Should make one request, then get interrupted during retry wait + self.assertEqual(mock_request.call_count, 1) + self.assertEqual(result, LogExportResult.FAILURE) + mock_wait.assert_called_once() @staticmethod def generate_test_log_data(count=5): diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index da1e5fbdc..57332c40b 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -47,7 +47,7 @@ from amazon.opentelemetry.distro.aws_span_metrics_processor import AwsSpanMetricsProcessor from amazon.opentelemetry.distro.exporter.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.aws_batch_log_record_processor import ( +from amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor import ( AwsCloudWatchOtlpBatchLogRecordProcessor, ) from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter From 7a83e92d71d18740527a8c326aae41a5185b1665 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 21:38:58 +0000 Subject: [PATCH 39/52] linting fix --- .../aws/logs/test_aws_cw_otlp_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py index adfa1eecc..bbd63ccb6 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py @@ -117,7 +117,7 @@ def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): def test_process_log_data_primitive(self): primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None, "深入 Python", "café"] - expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("caf")] + expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("calf")] for index, primitive in enumerate(primitives): log = self.generate_test_log_data(log_body=primitive, count=1) From a38d43daeabc90fc00d08c6a804331fe0bc19261 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 21:41:52 +0000 Subject: [PATCH 40/52] fix test --- .../aws/logs/test_aws_cw_otlp_batch_log_record_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py index bbd63ccb6..adfa1eecc 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py @@ -117,7 +117,7 @@ def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): def test_process_log_data_primitive(self): primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None, "深入 Python", "café"] - expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("calf")] + expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("caf")] for index, primitive in enumerate(primitives): log = self.generate_test_log_data(log_body=primitive, count=1) From 726a9a85a657788ec02c4a7a9524cb2f9aa89b25 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 21:44:12 +0000 Subject: [PATCH 41/52] linting fix --- .../aws/logs/test_aws_cw_otlp_batch_log_record_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py index adfa1eecc..2d019bce7 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py @@ -116,8 +116,8 @@ def test_process_log_data_nested_structure_size_exceeds_max_log_size(self): def test_process_log_data_primitive(self): - primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None, "深入 Python", "café"] - expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("caf")] + primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None, "深入 Python", "calfé"] + expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("calf")] for index, primitive in enumerate(primitives): log = self.generate_test_log_data(log_body=primitive, count=1) From fc7712312984059f73509d990988b3f001cd36bb Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 21:49:14 +0000 Subject: [PATCH 42/52] linting fix --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 2 ++ .../distro/test_aws_opentelementry_configurator.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 2710996bc..16fb06132 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -23,6 +23,8 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder + +# pylint: disable=line-too-long from amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor import ( AwsCloudWatchOtlpBatchLogRecordProcessor, ) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 57332c40b..98d6a12fd 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -47,6 +47,8 @@ from amazon.opentelemetry.distro.aws_span_metrics_processor import AwsSpanMetricsProcessor from amazon.opentelemetry.distro.exporter.aws.metrics.aws_cloudwatch_emf_exporter import AwsCloudWatchEmfExporter from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession + +# pylint: disable=line-too-long from amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor import ( AwsCloudWatchOtlpBatchLogRecordProcessor, ) @@ -1016,7 +1018,7 @@ def test_validate_and_fetch_logs_header(self): @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.is_agent_observability_enabled") @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._is_aws_otlp_endpoint") def test_customize_log_record_processor_with_agent_observability(self, mock_is_aws_endpoint, mock_is_agent_enabled): - """Test that AwsCloudWatchOtlpBatchLogRecordProcessor is used when agent observability is enabled and endpoint is logs endpoint""" + """Test that AwsCloudWatchOtlpBatchLogRecordProcessor is used when agent observability is enabled""" # Mock the OTLPAwsLogExporter mock_exporter = MagicMock(spec=OTLPAwsLogExporter) From f571ffb9dce04059272d30daeb4f93dfc9708242 Mon Sep 17 00:00:00 2001 From: liustve Date: Wed, 2 Jul 2025 22:03:48 +0000 Subject: [PATCH 43/52] linting fix --- .../distro/test_aws_opentelementry_configurator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 98d6a12fd..66e168a57 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -1019,14 +1019,12 @@ def test_validate_and_fetch_logs_header(self): @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._is_aws_otlp_endpoint") def test_customize_log_record_processor_with_agent_observability(self, mock_is_aws_endpoint, mock_is_agent_enabled): """Test that AwsCloudWatchOtlpBatchLogRecordProcessor is used when agent observability is enabled""" - # Mock the OTLPAwsLogExporter mock_exporter = MagicMock(spec=OTLPAwsLogExporter) - - # Test case 1: Agent observability enabled and AWS logs endpoint mock_is_agent_enabled.return_value = True mock_is_aws_endpoint.return_value = True processor = _customize_log_record_processor(mock_exporter) + self.assertIsInstance(processor, AwsCloudWatchOtlpBatchLogRecordProcessor) @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._validate_and_fetch_logs_header") From d7575b85c25d5fb9a2f643adfca9bd7f40315e2f Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 15:47:16 +0000 Subject: [PATCH 44/52] fixing consolidation changes --- .../distro/aws_opentelemetry_configurator.py | 61 ++++++----- .../aws/metrics/_cloudwatch_log_client.py | 2 +- .../otlp/aws/logs/otlp_aws_logs_exporter.py | 3 +- .../otlp/aws/traces/otlp_aws_span_exporter.py | 2 +- .../aws/logs/test_otlp_aws_logs_exporter.py | 3 +- .../aws/traces/test_otlp_aws_span_exporter.py | 23 ++-- .../test_aws_opentelementry_configurator.py | 103 ++++++++++++++++-- .../amazon/opentelemetry/distro/test_utils.py | 22 ++++ 8 files changed, 172 insertions(+), 47 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 93d9e48eb..f2014450a 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -12,7 +12,7 @@ from amazon.opentelemetry.distro._aws_attribute_keys import AWS_LOCAL_SERVICE, AWS_SERVICE_TYPE from amazon.opentelemetry.distro._aws_resource_attribute_configurator import get_service_attribute -from amazon.opentelemetry.distro._utils import IS_BOTOCORE_INSTALLED, get_aws_session, is_agent_observability_enabled +from amazon.opentelemetry.distro._utils import get_aws_session, is_agent_observability_enabled from amazon.opentelemetry.distro.always_record_sampler import AlwaysRecordSampler from amazon.opentelemetry.distro.attribute_propagating_span_processor_builder import ( AttributePropagatingSpanProcessorBuilder, @@ -23,8 +23,6 @@ AwsMetricAttributesSpanExporterBuilder, ) from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder -from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter -from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler from amazon.opentelemetry.distro.scope_based_exporter import ScopeBasedPeriodicExportingMetricReader @@ -210,9 +208,9 @@ def _init_logging( for _, exporter_class in exporters.items(): exporter_args = {} - log_exporter: LogExporter = _customize_logs_exporter(exporter_class(**exporter_args)) - log_processor = _customize_log_record_processor(log_exporter) - provider.add_log_record_processor(log_processor) + _customize_log_record_processor( + logger_provider=provider, log_exporter=_customize_logs_exporter(exporter_class(**exporter_args)) + ) event_logger_provider = EventLoggerProvider(logger_provider=provider) set_event_logger_provider(event_logger_provider) @@ -299,10 +297,12 @@ def _export_unsampled_span_for_agent_observability(trace_provider: TracerProvide return traces_endpoint = os.environ.get(OTEL_EXPORTER_OTLP_TRACES_ENDPOINT) + if traces_endpoint and _is_aws_otlp_endpoint(traces_endpoint): + endpoint = traces_endpoint.lower() + region = endpoint.split(".")[1] - span_exporter = _create_aws_exporter(endpoint=traces_endpoint) - - trace_provider.add_span_processor(BatchUnsampledSpanProcessor(span_exporter=span_exporter)) + span_exporter = _create_aws_otlp_exporter(endpoint=endpoint, service="xray", region=region) + trace_provider.add_span_processor(BatchUnsampledSpanProcessor(span_exporter=span_exporter)) def _is_defer_to_workers_enabled(): @@ -397,11 +397,13 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> traces_endpoint = os.environ.get(AWS_XRAY_DAEMON_ADDRESS_CONFIG, "127.0.0.1:2000") span_exporter = OTLPUdpSpanExporter(endpoint=traces_endpoint) - if _is_aws_otlp_endpoint(traces_endpoint, "xray"): + if traces_endpoint and _is_aws_otlp_endpoint(traces_endpoint, "xray"): _logger.info("Detected using AWS OTLP Traces Endpoint.") if isinstance(span_exporter, OTLPSpanExporter): - return _create_aws_exporter(endpoint=traces_endpoint) + endpoint = traces_endpoint.lower() + region = endpoint.split(".")[1] + return _create_aws_otlp_exporter(endpoint=traces_endpoint, service="xray", region=region) else: _logger.warning( @@ -415,24 +417,34 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> return AwsMetricAttributesSpanExporterBuilder(span_exporter, resource).build() -def _customize_log_record_processor(log_exporter: LogExporter): - if isinstance(log_exporter, OTLPAwsLogExporter) and is_agent_observability_enabled(): - return AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=log_exporter) +def _customize_log_record_processor(logger_provider: LoggerProvider, log_exporter: Optional[LogExporter]) -> None: + if not log_exporter: + return - return BatchLogRecordProcessor(exporter=log_exporter) + if is_agent_observability_enabled(): + from amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor import ( + AwsCloudWatchOtlpBatchLogRecordProcessor, + ) + + logger_provider.add_log_record_processor(AwsCloudWatchOtlpBatchLogRecordProcessor(exporter=log_exporter)) + else: + logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter=log_exporter)) def _customize_logs_exporter(log_exporter: LogExporter) -> LogExporter: logs_endpoint = os.environ.get(OTEL_EXPORTER_OTLP_LOGS_ENDPOINT) - if _is_aws_otlp_endpoint(logs_endpoint, "logs"): + if logs_endpoint and _is_aws_otlp_endpoint(logs_endpoint, "logs"): + _logger.info("Detected using AWS OTLP Logs Endpoint.") if isinstance(log_exporter, OTLPLogExporter) and _validate_and_fetch_logs_header().is_valid: + endpoint = logs_endpoint.lower() + region = endpoint.split(".")[1] # Setting default compression mode to Gzip as this is the behavior in upstream's # collector otlp http exporter: # https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter - return _create_aws_exporter(endpoint=logs_endpoint) + return _create_aws_otlp_exporter(endpoint=logs_endpoint, service="logs", region=region) _logger.warning( "Improper configuration see: please export/set " @@ -594,11 +606,11 @@ def _is_lambda_environment(): def _is_aws_otlp_endpoint(otlp_endpoint: Optional[str] = None, service: str = "xray") -> bool: """Is the given endpoint an AWS OTLP endpoint?""" - pattern = AWS_TRACES_OTLP_ENDPOINT_PATTERN if service == "xray" else AWS_LOGS_OTLP_ENDPOINT_PATTERN - if not otlp_endpoint: return False + pattern = AWS_TRACES_OTLP_ENDPOINT_PATTERN if service == "xray" else AWS_LOGS_OTLP_ENDPOINT_PATTERN + return bool(re.match(pattern, otlp_endpoint.lower())) @@ -787,7 +799,7 @@ def create_emf_exporter(): return None -def _create_aws_exporter(endpoint: str): +def _create_aws_otlp_exporter(endpoint: str, service: str, region: str): """Create and configure the AWS OTLP exporters.""" try: session = get_aws_session() @@ -800,12 +812,7 @@ def _create_aws_exporter(endpoint: str): from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter - endpoint = endpoint.lower() - split = endpoint.split(".") - service = split[0] - region = split[1] - - if "xray" in service: + if service == "xray": if is_agent_observability_enabled(): # Span exporter needs an instance of logger provider in ai agent # observability case because we need to split input/output prompts @@ -818,7 +825,7 @@ def _create_aws_exporter(endpoint: str): return OTLPAwsSpanExporter(session=session, endpoint=endpoint, aws_region=region) - if "logs" in service: + if service == "logs": return OTLPAwsLogExporter(session=session, aws_region=region) # pylint: disable=broad-exception-caught diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/aws/metrics/_cloudwatch_log_client.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/aws/metrics/_cloudwatch_log_client.py index a62b34c43..b7daac12b 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/aws/metrics/_cloudwatch_log_client.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/aws/metrics/_cloudwatch_log_client.py @@ -90,7 +90,7 @@ class CloudWatchLogClient: def __init__( self, log_group_name: str, - session: Session = Session(), + session: Session, log_stream_name: Optional[str] = None, aws_region: Optional[str] = None, **kwargs, diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py index 5d41bbfba..4ed3649c3 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/otlp_aws_logs_exporter.py @@ -10,6 +10,7 @@ from time import time from typing import Dict, Optional, Sequence +from botocore.session import Session from requests import Response from requests.exceptions import ConnectionError as RequestsConnectionError from requests.structures import CaseInsensitiveDict @@ -39,7 +40,7 @@ class OTLPAwsLogExporter(OTLPLogExporter): def __init__( self, aws_region: str, - session: Session = Session(), + session: Session, log_group: Optional[str] = None, log_stream: Optional[str] = None, endpoint: Optional[str] = None, diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py index a47351417..3589121d9 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/traces/otlp_aws_span_exporter.py @@ -31,7 +31,7 @@ class OTLPAwsSpanExporter(OTLPSpanExporter): def __init__( self, aws_region: str, - session: Session = Session(), + session: Session, endpoint: Optional[str] = None, certificate_file: Optional[str] = None, client_key_file: Optional[str] = None, diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index a12341dba..53da03738 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -7,6 +7,7 @@ import requests from requests.structures import CaseInsensitiveDict +from amazon.opentelemetry.distro._utils import get_aws_session from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import _MAX_RETRYS, OTLPAwsLogExporter from opentelemetry._logs.severity import SeverityNumber from opentelemetry.sdk._logs import LogData, LogRecord @@ -36,7 +37,7 @@ class TestOTLPAwsLogsExporter(TestCase): def setUp(self): self.logs = self.generate_test_log_data() - self.exporter = OTLPAwsLogExporter(aws_region="us-east-1", endpoint=self._ENDPOINT) + self.exporter = OTLPAwsLogExporter(session=get_aws_session(), aws_region="us-east-1", endpoint=self._ENDPOINT) @patch("requests.Session.post", return_value=good_response) def test_export_success(self, mock_request): diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py index 63bd1c227..1553dd8e2 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/traces/test_otlp_aws_span_exporter.py @@ -4,6 +4,7 @@ from unittest import TestCase from unittest.mock import MagicMock, patch +from amazon.opentelemetry.distro._utils import get_aws_session from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.sdk._logs import LoggerProvider @@ -17,7 +18,9 @@ def test_init_with_logger_provider(self): mock_logger_provider = MagicMock(spec=LoggerProvider) endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint, logger_provider=mock_logger_provider) + exporter = OTLPAwsSpanExporter( + session=get_aws_session(), aws_region="us-east-1", endpoint=endpoint, logger_provider=mock_logger_provider + ) self.assertEqual(exporter._logger_provider, mock_logger_provider) self.assertEqual(exporter._aws_region, "us-east-1") @@ -26,7 +29,7 @@ def test_init_without_logger_provider(self): # Test initialization without logger_provider (default behavior) endpoint = "https://xray.us-west-2.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-west-2", endpoint=endpoint) + exporter = OTLPAwsSpanExporter(session=get_aws_session(), aws_region="us-west-2", endpoint=endpoint) self.assertIsNone(exporter._logger_provider) self.assertEqual(exporter._aws_region, "us-west-2") @@ -38,7 +41,7 @@ def test_ensure_llo_handler_when_disabled(self, mock_is_enabled): mock_is_enabled.return_value = False endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) + exporter = OTLPAwsSpanExporter(session=get_aws_session(), aws_region="us-east-1", endpoint=endpoint) result = exporter._ensure_llo_handler() self.assertFalse(result) @@ -59,7 +62,7 @@ def test_ensure_llo_handler_lazy_initialization( mock_llo_handler_class.return_value = mock_llo_handler endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) + exporter = OTLPAwsSpanExporter(session=get_aws_session(), aws_region="us-east-1", endpoint=endpoint) # First call should initialize result = exporter._ensure_llo_handler() @@ -87,7 +90,9 @@ def test_ensure_llo_handler_with_existing_logger_provider(self, mock_is_enabled, mock_logger_provider = MagicMock(spec=LoggerProvider) endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint, logger_provider=mock_logger_provider) + exporter = OTLPAwsSpanExporter( + session=get_aws_session(), aws_region="us-east-1", endpoint=endpoint, logger_provider=mock_logger_provider + ) with patch( "amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter.LLOHandler" @@ -110,7 +115,7 @@ def test_ensure_llo_handler_get_logger_provider_fails(self, mock_is_enabled, moc mock_get_logger_provider.side_effect = Exception("Failed to get logger provider") endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) + exporter = OTLPAwsSpanExporter(session=get_aws_session(), aws_region="us-east-1", endpoint=endpoint) result = exporter._ensure_llo_handler() @@ -123,7 +128,7 @@ def test_export_with_llo_disabled(self, mock_is_enabled): mock_is_enabled.return_value = False endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) + exporter = OTLPAwsSpanExporter(session=get_aws_session(), aws_region="us-east-1", endpoint=endpoint) # Mock the parent class export method with patch.object(OTLPSpanExporter, "export") as mock_parent_export: @@ -149,7 +154,7 @@ def test_export_with_llo_enabled(self, mock_llo_handler_class, mock_get_logger_p mock_llo_handler_class.return_value = mock_llo_handler endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) + exporter = OTLPAwsSpanExporter(session=get_aws_session(), aws_region="us-east-1", endpoint=endpoint) # Mock spans and processed spans original_spans = [MagicMock(spec=ReadableSpan), MagicMock(spec=ReadableSpan)] @@ -182,7 +187,7 @@ def test_export_with_llo_processing_failure( mock_llo_handler.process_spans.side_effect = Exception("LLO processing failed") endpoint = "https://xray.us-east-1.amazonaws.com/v1/traces" - exporter = OTLPAwsSpanExporter(aws_region="us-east-1", endpoint=endpoint) + exporter = OTLPAwsSpanExporter(session=get_aws_session(), aws_region="us-east-1", endpoint=endpoint) spans = [MagicMock(spec=ReadableSpan), MagicMock(spec=ReadableSpan)] diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 9eaddc407..14cb9f824 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -25,6 +25,7 @@ AwsOpenTelemetryConfigurator, OtlpLogHeaderSetting, _check_emf_exporter_enabled, + _create_aws_otlp_exporter, _custom_import_sampler, _customize_log_record_processor, _customize_logs_exporter, @@ -69,6 +70,7 @@ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.metrics import get_meter_provider from opentelemetry.processor.baggage import BaggageSpanProcessor +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor from opentelemetry.sdk.environment_variables import OTEL_TRACES_SAMPLER, OTEL_TRACES_SAMPLER_ARG from opentelemetry.sdk.metrics._internal.export import PeriodicExportingMetricReader from opentelemetry.sdk.resources import Resource @@ -386,6 +388,7 @@ def test_customize_span_processors_with_agent_observability(self): mock_tracer_provider.reset_mock() os.environ["AGENT_OBSERVABILITY_ENABLED"] = "true" + os.environ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"] = "https://xray.us-east-1.amazonaws.com/v1/traces" _customize_span_processors(mock_tracer_provider, Resource.get_empty()) self.assertEqual(mock_tracer_provider.add_span_processor.call_count, 2) @@ -395,6 +398,7 @@ def test_customize_span_processors_with_agent_observability(self): self.assertIsInstance(second_processor, BaggageSpanProcessor) os.environ.pop("AGENT_OBSERVABILITY_ENABLED", None) + os.environ.pop("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", None) def test_baggage_span_processor_session_id_filtering(self): """Test that BaggageSpanProcessor only set session.id filter by default""" @@ -695,6 +699,7 @@ def test_customize_span_processors(self): mock_tracer_provider.reset_mock() os.environ.setdefault("AGENT_OBSERVABILITY_ENABLED", "true") + os.environ.setdefault("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "https://xray.us-east-1.amazonaws.com/v1/traces") _customize_span_processors(mock_tracer_provider, Resource.get_empty()) self.assertEqual(mock_tracer_provider.add_span_processor.call_count, 4) @@ -704,6 +709,8 @@ def test_customize_span_processors(self): self.assertIsInstance(processors[2], AttributePropagatingSpanProcessor) self.assertIsInstance(processors[3], AwsSpanMetricsProcessor) + os.environ.pop("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") + def test_customize_span_processors_lambda(self): mock_tracer_provider: TracerProvider = MagicMock() # Clean up environment to ensure consistent test state @@ -1016,17 +1023,33 @@ def test_validate_and_fetch_logs_header(self): # Clean up os.environ.pop(OTEL_EXPORTER_OTLP_LOGS_HEADERS, None) - @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.is_agent_observability_enabled") - @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._is_aws_otlp_endpoint") - def test_customize_log_record_processor_with_agent_observability(self, mock_is_aws_endpoint, mock_is_agent_enabled): + @patch( + "amazon.opentelemetry.distro.aws_opentelemetry_configurator.is_agent_observability_enabled", return_value=False + ) + def test_customize_log_record_processor_without_agent_observability(self, _): + """Test that BatchLogRecordProcessor is used when agent observability is not enabled""" + mock_logger_provider = MagicMock() + mock_exporter = MagicMock(spec=OTLPAwsLogExporter) + + _customize_log_record_processor(mock_logger_provider, mock_exporter) + + mock_logger_provider.add_log_record_processor.assert_called_once() + added_processor = mock_logger_provider.add_log_record_processor.call_args[0][0] + self.assertIsInstance(added_processor, BatchLogRecordProcessor) + + @patch( + "amazon.opentelemetry.distro.aws_opentelemetry_configurator.is_agent_observability_enabled", return_value=True + ) + def test_customize_log_record_processor_with_agent_observability(self, _): """Test that AwsCloudWatchOtlpBatchLogRecordProcessor is used when agent observability is enabled""" + mock_logger_provider = MagicMock() mock_exporter = MagicMock(spec=OTLPAwsLogExporter) - mock_is_agent_enabled.return_value = True - mock_is_aws_endpoint.return_value = True - processor = _customize_log_record_processor(mock_exporter) + _customize_log_record_processor(mock_logger_provider, mock_exporter) - self.assertIsInstance(processor, AwsCloudWatchOtlpBatchLogRecordProcessor) + mock_logger_provider.add_log_record_processor.assert_called_once() + added_processor = mock_logger_provider.add_log_record_processor.call_args[0][0] + self.assertIsInstance(added_processor, AwsCloudWatchOtlpBatchLogRecordProcessor) @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator._validate_and_fetch_logs_header") @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.get_aws_session") @@ -1085,6 +1108,72 @@ def test_create_emf_exporter(self, mock_get_session, mock_validate): result = create_emf_exporter() self.assertIsNone(result) + @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.get_logger_provider") + @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.is_agent_observability_enabled") + @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.get_aws_session") + def test_create_aws_otlp_exporter(self, mock_get_session, mock_is_agent_enabled, mock_get_logger_provider): + # Test when botocore is not installed + mock_get_session.return_value = None + result = _create_aws_otlp_exporter("https://xray.us-east-1.amazonaws.com/v1/traces", "xray", "us-east-1") + self.assertIsNone(result) + + # Reset mock for subsequent tests + mock_get_session.reset_mock() + mock_get_session.return_value = MagicMock() + mock_get_logger_provider.return_value = MagicMock() + + # Test xray service without agent observability + mock_is_agent_enabled.return_value = False + with patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter.OTLPAwsSpanExporter" + ) as mock_span_exporter_class: + mock_exporter_instance = MagicMock() + mock_span_exporter_class.return_value = mock_exporter_instance + + result = _create_aws_otlp_exporter("https://xray.us-east-1.amazonaws.com/v1/traces", "xray", "us-east-1") + self.assertIsNotNone(result) + self.assertEqual(result, mock_exporter_instance) + mock_span_exporter_class.assert_called_with( + session=mock_get_session.return_value, + endpoint="https://xray.us-east-1.amazonaws.com/v1/traces", + aws_region="us-east-1", + ) + + # Test xray service with agent observability + mock_is_agent_enabled.return_value = True + with patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter.OTLPAwsSpanExporter" + ) as mock_span_exporter_class: + mock_exporter_instance = MagicMock() + mock_span_exporter_class.return_value = mock_exporter_instance + + result = _create_aws_otlp_exporter("https://xray.us-east-1.amazonaws.com/v1/traces", "xray", "us-east-1") + self.assertIsNotNone(result) + self.assertEqual(result, mock_exporter_instance) + mock_span_exporter_class.assert_called_with( + session=mock_get_session.return_value, + endpoint="https://xray.us-east-1.amazonaws.com/v1/traces", + aws_region="us-east-1", + logger_provider=mock_get_logger_provider.return_value, + ) + + # Test logs service + with patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter.OTLPAwsLogExporter" + ) as mock_log_exporter_class: + mock_exporter_instance = MagicMock() + mock_log_exporter_class.return_value = mock_exporter_instance + + result = _create_aws_otlp_exporter("https://logs.us-east-1.amazonaws.com/v1/logs", "logs", "us-east-1") + self.assertIsNotNone(result) + self.assertEqual(result, mock_exporter_instance) + mock_log_exporter_class.assert_called_with(session=mock_get_session.return_value, aws_region="us-east-1") + + # Test exception handling + mock_get_session.side_effect = Exception("Test exception") + result = _create_aws_otlp_exporter("https://xray.us-east-1.amazonaws.com/v1/traces", "xray", "us-east-1") + self.assertIsNone(result) + def test_customize_metric_exporters_with_emf(self): metric_readers = [] views = [] diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py index 1079feb4a..4c0cd709f 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_utils.py @@ -151,3 +151,25 @@ def test_get_aws_region_botocore_no_region(self): region = get_aws_region() self.assertIsNone(region) mock_session.get_config_variable.assert_called_once_with("region") + + def test_get_aws_region_with_aws_region_env(self): + """Test get_aws_region when AWS_REGION environment variable is set""" + os.environ.pop("AWS_REGION", None) + os.environ.pop("AWS_DEFAULT_REGION", None) + os.environ["AWS_REGION"] = "us-west-2" + + region = get_aws_region() + self.assertEqual(region, "us-west-2") + + os.environ.pop("AWS_REGION", None) + + def test_get_aws_region_with_aws_default_region_env(self): + """Test get_aws_region when AWS_DEFAULT_REGION environment variable is set""" + os.environ.pop("AWS_REGION", None) + os.environ.pop("AWS_DEFAULT_REGION", None) + os.environ["AWS_DEFAULT_REGION"] = "eu-west-1" + + region = get_aws_region() + self.assertEqual(region, "eu-west-1") + + os.environ.pop("AWS_DEFAULT_REGION", None) From 17d4cb9f35c960d4b46bedb2b0c55badf4ec463a Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 15:52:29 +0000 Subject: [PATCH 45/52] add comments --- .../src/amazon/opentelemetry/distro/_utils.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py index 42e4494a2..25c60d14a 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py @@ -42,11 +42,18 @@ def is_agent_observability_enabled() -> bool: def get_aws_session(): + """Returns a botocore session only if botocore is installed, otherwise None. + + We do this to prevent runtime errors for ADOT customers that do not need + any features that require botocore. + """ if IS_BOTOCORE_INSTALLED: # pylint: disable=import-outside-toplevel from botocore.session import Session session = Session() + # Botocore only looks up AWS_DEFAULT_REGION when creating a session/client + # See: https://docs.aws.amazon.com/sdkref/latest/guide/feature-region.html#feature-region-sdk-compat region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") if region: session.set_config_variable("region", region) @@ -55,5 +62,13 @@ def get_aws_session(): def get_aws_region() -> Optional[str]: + """Get AWS region from environment or botocore session. + + Returns the AWS region in the following priority order: + 1. AWS_REGION environment variable + 2. AWS_DEFAULT_REGION environment variable + 3. botocore session's region (if botocore is available) + 4. None if no region can be determined + """ botocore_session = get_aws_session() return botocore_session.get_config_variable("region") if botocore_session else None From e8a1ec966ce977660253f551b27993e910f54097 Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 16:05:05 +0000 Subject: [PATCH 46/52] linting fix --- .../distro/aws_opentelemetry_configurator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index f2014450a..5816f3458 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -405,11 +405,10 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> region = endpoint.split(".")[1] return _create_aws_otlp_exporter(endpoint=traces_endpoint, service="xray", region=region) - else: - _logger.warning( - "Improper configuration see: please export/set " - "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf and OTEL_TRACES_EXPORTER=otlp" - ) + _logger.warning( + "Improper configuration see: please export/set " + "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf and OTEL_TRACES_EXPORTER=otlp" + ) if not _is_application_signals_enabled(): return span_exporter @@ -828,6 +827,7 @@ def _create_aws_otlp_exporter(endpoint: str, service: str, region: str): if service == "logs": return OTLPAwsLogExporter(session=session, aws_region=region) + return None # pylint: disable=broad-exception-caught except Exception as errors: _logger.error("Failed to create AWS OTLP exporter: %s", errors) From 9f8418398a0f0a18044a0697cd7ef29d8eec4365 Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 16:18:57 +0000 Subject: [PATCH 47/52] update tests --- .../otlp/aws/common/aws_auth_session.py | 2 +- .../otlp/aws/common/test_aws_auth_session.py | 5 ++-- .../aws/logs/test_otlp_aws_logs_exporter.py | 27 ++++++++++++++++++- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py index c945727c5..564bfe9e2 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py @@ -34,7 +34,7 @@ class AwsAuthSession(requests.Session): service (str): The AWS service name for signing (e.g., "logs" or "xray") """ - def __init__(self, aws_region: str, service: str, session: Session = Session()): + def __init__(self, aws_region: str, service: str, session: Session): self._aws_region: str = aws_region self._service: str = service self._session: Session = session diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py index f69480e23..11babbb7b 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -6,6 +6,7 @@ import requests from botocore.credentials import Credentials +from amazon.opentelemetry.distro._utils import get_aws_session from amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session import AwsAuthSession AWS_OTLP_TRACES_ENDPOINT = "https://xray.us-east-1.amazonaws.com/v1/traces" @@ -24,7 +25,7 @@ class TestAwsAuthSession(TestCase): def test_aws_auth_session_no_credentials(self, _, __): """Tests that aws_auth_session will not inject SigV4 Headers if retrieving credentials returns None.""" - session = AwsAuthSession("us-east-1", "xray") + session = AwsAuthSession("us-east-1", "xray", get_aws_session()) actual_headers = {"test": "test"} session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) @@ -38,7 +39,7 @@ def test_aws_auth_session_no_credentials(self, _, __): def test_aws_auth_session(self, _, __): """Tests that aws_auth_session will inject SigV4 Headers if botocore is installed.""" - session = AwsAuthSession("us-east-1", "xray") + session = AwsAuthSession("us-east-1", "xray", get_aws_session()) actual_headers = {"test": "test"} session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py index 53da03738..5c75f63de 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_otlp_aws_logs_exporter.py @@ -201,7 +201,32 @@ def test_export_interrupted_by_shutdown(self, mock_request, mock_wait): # Should make one request, then get interrupted during retry wait self.assertEqual(mock_request.call_count, 1) self.assertEqual(result, LogExportResult.FAILURE) - mock_wait.assert_called_once() + + @patch("requests.Session.post", return_value=good_response) + def test_export_with_log_group_and_stream_headers(self, mock_request): + """Tests that log_group and log_stream are properly set as headers when provided.""" + log_group = "test-log-group" + log_stream = "test-log-stream" + + exporter = OTLPAwsLogExporter( + session=get_aws_session(), + aws_region="us-east-1", + endpoint=self._ENDPOINT, + log_group=log_group, + log_stream=log_stream, + ) + + result = exporter.export(self.logs) + + mock_request.assert_called_once() + self.assertEqual(result, LogExportResult.SUCCESS) + + # Verify headers contain log group and stream + session_headers = exporter._session.headers + self.assertIn("x-aws-log-group", session_headers) + self.assertIn("x-aws-log-stream", session_headers) + self.assertEqual(session_headers["x-aws-log-group"], log_group) + self.assertEqual(session_headers["x-aws-log-stream"], log_stream) @staticmethod def generate_test_log_data(count=5): From fb7f6c85c033b26ad5ebd1b01957980d0cea91de Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 17:04:09 +0000 Subject: [PATCH 48/52] linting fix --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 5816f3458..e8997885b 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -421,6 +421,7 @@ def _customize_log_record_processor(logger_provider: LoggerProvider, log_exporte return if is_agent_observability_enabled(): + # pylint: disable=import-outside-toplevel from amazon.opentelemetry.distro.exporter.otlp.aws.logs._aws_cw_otlp_batch_log_record_processor import ( AwsCloudWatchOtlpBatchLogRecordProcessor, ) From 22a1bd1854930711016507cf7eb77392ffd0f55c Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 19:55:36 +0000 Subject: [PATCH 49/52] addressed PR comments --- .../src/amazon/opentelemetry/distro/_utils.py | 5 +- .../distro/aws_opentelemetry_configurator.py | 51 ++++++++++--------- .../test_aws_opentelementry_configurator.py | 18 ++++--- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py index 25c60d14a..129a4c0e2 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py @@ -42,7 +42,10 @@ def is_agent_observability_enabled() -> bool: def get_aws_session(): - """Returns a botocore session only if botocore is installed, otherwise None. + """ + Returns a botocore session only if botocore is installed, otherwise None. + If AWS Region is defined in `AWS_REGION` or `AWS_DEFAULT_REGION` environment variables, + then the region is set in the botocore session before returning. We do this to prevent runtime errors for ADOT customers that do not need any features that require botocore. diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index e8997885b..384600388 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -95,6 +95,8 @@ OTEL_EXPORTER_OTLP_LOGS_ENDPOINT = "OTEL_EXPORTER_OTLP_LOGS_ENDPOINT" OTEL_EXPORTER_OTLP_LOGS_HEADERS = "OTEL_EXPORTER_OTLP_LOGS_HEADERS" +XRAY_SERVICE = "xray" +LOGS_SERIVCE = "logs" AWS_TRACES_OTLP_ENDPOINT_PATTERN = r"https://xray\.([a-z0-9-]+)\.amazonaws\.com/v1/traces$" AWS_LOGS_OTLP_ENDPOINT_PATTERN = r"https://logs\.([a-z0-9-]+)\.amazonaws\.com/v1/logs$" @@ -297,11 +299,10 @@ def _export_unsampled_span_for_agent_observability(trace_provider: TracerProvide return traces_endpoint = os.environ.get(OTEL_EXPORTER_OTLP_TRACES_ENDPOINT) - if traces_endpoint and _is_aws_otlp_endpoint(traces_endpoint): - endpoint = traces_endpoint.lower() - region = endpoint.split(".")[1] + if traces_endpoint and _is_aws_otlp_endpoint(traces_endpoint, XRAY_SERVICE): + endpoint, region = _extract_endpoint_and_region_from_otlp_endpoint(traces_endpoint) + span_exporter = _create_aws_otlp_exporter(endpoint=endpoint, service=XRAY_SERVICE, region=region) - span_exporter = _create_aws_otlp_exporter(endpoint=endpoint, service="xray", region=region) trace_provider.add_span_processor(BatchUnsampledSpanProcessor(span_exporter=span_exporter)) @@ -351,7 +352,7 @@ def _custom_import_sampler(sampler_name: str, resource: Resource) -> Sampler: if sampler_name is None: sampler_name = "parentbased_always_on" - if sampler_name == "xray": + if sampler_name == XRAY_SERVICE: # Example env var value # OTEL_TRACES_SAMPLER_ARG=endpoint=http://localhost:2000,polling_interval=360 sampler_argument_env: str = os.getenv(OTEL_TRACES_SAMPLER_ARG, None) @@ -397,17 +398,16 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> traces_endpoint = os.environ.get(AWS_XRAY_DAEMON_ADDRESS_CONFIG, "127.0.0.1:2000") span_exporter = OTLPUdpSpanExporter(endpoint=traces_endpoint) - if traces_endpoint and _is_aws_otlp_endpoint(traces_endpoint, "xray"): + if traces_endpoint and _is_aws_otlp_endpoint(traces_endpoint, XRAY_SERVICE): _logger.info("Detected using AWS OTLP Traces Endpoint.") if isinstance(span_exporter, OTLPSpanExporter): - endpoint = traces_endpoint.lower() - region = endpoint.split(".")[1] - return _create_aws_otlp_exporter(endpoint=traces_endpoint, service="xray", region=region) + endpoint, region = _extract_endpoint_and_region_from_otlp_endpoint(traces_endpoint) + return _create_aws_otlp_exporter(endpoint=endpoint, service=XRAY_SERVICE, region=region) _logger.warning( "Improper configuration see: please export/set " - "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf and OTEL_TRACES_EXPORTER=otlp" + "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf ƒnd OTEL_TRACES_EXPORTER=otlp" ) if not _is_application_signals_enabled(): @@ -439,12 +439,11 @@ def _customize_logs_exporter(log_exporter: LogExporter) -> LogExporter: _logger.info("Detected using AWS OTLP Logs Endpoint.") if isinstance(log_exporter, OTLPLogExporter) and _validate_and_fetch_logs_header().is_valid: - endpoint = logs_endpoint.lower() - region = endpoint.split(".")[1] + endpoint, region = _extract_endpoint_and_region_from_otlp_endpoint(logs_endpoint) # Setting default compression mode to Gzip as this is the behavior in upstream's # collector otlp http exporter: # https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter - return _create_aws_otlp_exporter(endpoint=logs_endpoint, service="logs", region=region) + return _create_aws_otlp_exporter(endpoint=endpoint, service="logs", region=region) _logger.warning( "Improper configuration see: please export/set " @@ -513,7 +512,7 @@ def _customize_metric_exporters( metric_readers.append(scope_based_periodic_exporting_metric_reader) if is_emf_enabled: - emf_exporter = create_emf_exporter() + emf_exporter = _create_emf_exporter() if emf_exporter: metric_readers.append(PeriodicExportingMetricReader(emf_exporter)) @@ -603,17 +602,24 @@ def _is_lambda_environment(): return AWS_LAMBDA_FUNCTION_NAME_CONFIG in os.environ -def _is_aws_otlp_endpoint(otlp_endpoint: Optional[str] = None, service: str = "xray") -> bool: +def _is_aws_otlp_endpoint(otlp_endpoint: Optional[str], service: str) -> bool: """Is the given endpoint an AWS OTLP endpoint?""" if not otlp_endpoint: return False - pattern = AWS_TRACES_OTLP_ENDPOINT_PATTERN if service == "xray" else AWS_LOGS_OTLP_ENDPOINT_PATTERN + pattern = AWS_TRACES_OTLP_ENDPOINT_PATTERN if service == XRAY_SERVICE else AWS_LOGS_OTLP_ENDPOINT_PATTERN return bool(re.match(pattern, otlp_endpoint.lower())) +def _extract_endpoint_and_region_from_otlp_endpoint(endpoint: str): + endpoint = endpoint.lower() + region = endpoint.split(".")[1] + + return endpoint, region + + def _validate_and_fetch_logs_header() -> OtlpLogHeaderSetting: """Checks if x-aws-log-group and x-aws-log-stream are present in the headers in order to send logs to AWS OTLP Logs endpoint.""" @@ -630,7 +636,6 @@ def _validate_and_fetch_logs_header() -> OtlpLogHeaderSetting: log_group = None log_stream = None namespace = None - filtered_log_headers_count = 0 for pair in logs_headers.split(","): if "=" in pair: @@ -639,14 +644,12 @@ def _validate_and_fetch_logs_header() -> OtlpLogHeaderSetting: value = split[1] if key == AWS_OTLP_LOGS_GROUP_HEADER and value: log_group = value - filtered_log_headers_count += 1 elif key == AWS_OTLP_LOGS_STREAM_HEADER and value: log_stream = value - filtered_log_headers_count += 1 elif key == AWS_EMF_METRICS_NAMESPACE and value: namespace = value - is_valid = filtered_log_headers_count == 2 and log_group is not None and log_stream is not None + is_valid = log_group is not None and log_stream is not None if not is_valid: _logger.warning( @@ -768,7 +771,7 @@ def _check_emf_exporter_enabled() -> bool: return True -def create_emf_exporter(): +def _create_emf_exporter(): """Create and configure the CloudWatch EMF exporter.""" try: session = get_aws_session() @@ -805,14 +808,14 @@ def _create_aws_otlp_exporter(endpoint: str, service: str, region: str): session = get_aws_session() # Check if botocore is available before importing the AWS exporter if not session: - _logger.warning("SigV4 Auth requires botocore to be enabled") + _logger.warning("Sigv4 Auth requires botocore to be enabled") return None # pylint: disable=import-outside-toplevel from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter from amazon.opentelemetry.distro.exporter.otlp.aws.traces.otlp_aws_span_exporter import OTLPAwsSpanExporter - if service == "xray": + if service == XRAY_SERVICE: if is_agent_observability_enabled(): # Span exporter needs an instance of logger provider in ai agent # observability case because we need to split input/output prompts @@ -825,7 +828,7 @@ def _create_aws_otlp_exporter(endpoint: str, service: str, region: str): return OTLPAwsSpanExporter(session=session, endpoint=endpoint, aws_region=region) - if service == "logs": + if service == LOGS_SERIVCE: return OTLPAwsLogExporter(session=session, aws_region=region) return None diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py index 14cb9f824..2a01bbd5c 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_opentelementry_configurator.py @@ -26,6 +26,7 @@ OtlpLogHeaderSetting, _check_emf_exporter_enabled, _create_aws_otlp_exporter, + _create_emf_exporter, _custom_import_sampler, _customize_log_record_processor, _customize_logs_exporter, @@ -42,7 +43,6 @@ _is_defer_to_workers_enabled, _is_wsgi_master_process, _validate_and_fetch_logs_header, - create_emf_exporter, ) from amazon.opentelemetry.distro.aws_opentelemetry_distro import AwsOpenTelemetryDistro from amazon.opentelemetry.distro.aws_span_metrics_processor import AwsSpanMetricsProcessor @@ -1056,7 +1056,7 @@ def test_customize_log_record_processor_with_agent_observability(self, _): def test_create_emf_exporter(self, mock_get_session, mock_validate): # Test when botocore is not installed mock_get_session.return_value = None - result = create_emf_exporter() + result = _create_emf_exporter() self.assertIsNone(result) # Reset mock for subsequent tests @@ -1074,12 +1074,12 @@ def test_create_emf_exporter(self, mock_get_session, mock_validate): # Test when headers are invalid mock_validate.return_value = OtlpLogHeaderSetting(None, None, None, False) - result = create_emf_exporter() + result = _create_emf_exporter() self.assertIsNone(result) # Test when namespace is missing (should still create exporter with default namespace) mock_validate.return_value = OtlpLogHeaderSetting("test-group", "test-stream", None, True) - result = create_emf_exporter() + result = _create_emf_exporter() self.assertIsNotNone(result) self.assertEqual(result, mock_exporter_instance) # Verify that the EMF exporter was called with correct parameters @@ -1092,7 +1092,7 @@ def test_create_emf_exporter(self, mock_get_session, mock_validate): # Test with valid configuration mock_validate.return_value = OtlpLogHeaderSetting("test-group", "test-stream", "test-namespace", True) - result = create_emf_exporter() + result = _create_emf_exporter() self.assertIsNotNone(result) self.assertEqual(result, mock_exporter_instance) # Verify that the EMF exporter was called with correct parameters @@ -1105,7 +1105,7 @@ def test_create_emf_exporter(self, mock_get_session, mock_validate): # Test exception handling mock_validate.side_effect = Exception("Test exception") - result = create_emf_exporter() + result = _create_emf_exporter() self.assertIsNone(result) @patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.get_logger_provider") @@ -1183,7 +1183,9 @@ def test_customize_metric_exporters_with_emf(self): self.assertEqual(len(metric_readers), 0) # Test with EMF enabled but create_emf_exporter returns None - with patch("amazon.opentelemetry.distro.aws_opentelemetry_configurator.create_emf_exporter", return_value=None): + with patch( + "amazon.opentelemetry.distro.aws_opentelemetry_configurator._create_emf_exporter", return_value=None + ): _customize_metric_exporters(metric_readers, views, is_emf_enabled=True) self.assertEqual(len(metric_readers), 0) @@ -1194,7 +1196,7 @@ def test_customize_metric_exporters_with_emf(self): mock_emf_exporter._preferred_aggregation = {} with patch( - "amazon.opentelemetry.distro.aws_opentelemetry_configurator.create_emf_exporter", + "amazon.opentelemetry.distro.aws_opentelemetry_configurator._create_emf_exporter", return_value=mock_emf_exporter, ): _customize_metric_exporters(metric_readers, views, is_emf_enabled=True) From 6b50a03af5fea00df5a585f41da964f567a533c9 Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 20:06:21 +0000 Subject: [PATCH 50/52] typo fix --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index 384600388..bdf87ed8c 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -407,7 +407,7 @@ def _customize_span_exporter(span_exporter: SpanExporter, resource: Resource) -> _logger.warning( "Improper configuration see: please export/set " - "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf ƒnd OTEL_TRACES_EXPORTER=otlp" + "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http/protobuf and OTEL_TRACES_EXPORTER=otlp" ) if not _is_application_signals_enabled(): From 39ce39a2896743cdb7392e983568355b39c0e340 Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 20:07:30 +0000 Subject: [PATCH 51/52] point logs to LOGS_SERVICE --- .../opentelemetry/distro/aws_opentelemetry_configurator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py index bdf87ed8c..b36581719 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py @@ -434,7 +434,7 @@ def _customize_log_record_processor(logger_provider: LoggerProvider, log_exporte def _customize_logs_exporter(log_exporter: LogExporter) -> LogExporter: logs_endpoint = os.environ.get(OTEL_EXPORTER_OTLP_LOGS_ENDPOINT) - if logs_endpoint and _is_aws_otlp_endpoint(logs_endpoint, "logs"): + if logs_endpoint and _is_aws_otlp_endpoint(logs_endpoint, LOGS_SERIVCE): _logger.info("Detected using AWS OTLP Logs Endpoint.") @@ -443,7 +443,7 @@ def _customize_logs_exporter(log_exporter: LogExporter) -> LogExporter: # Setting default compression mode to Gzip as this is the behavior in upstream's # collector otlp http exporter: # https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter - return _create_aws_otlp_exporter(endpoint=endpoint, service="logs", region=region) + return _create_aws_otlp_exporter(endpoint=endpoint, service=LOGS_SERIVCE, region=region) _logger.warning( "Improper configuration see: please export/set " From d1aebbb0fa36be31d1d5612b1bd71784d5d2b558 Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 3 Jul 2025 20:13:29 +0000 Subject: [PATCH 52/52] removed unncessary comment --- .../otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py index 156f177cb..f22c18492 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_cw_otlp_batch_log_record_processor.py @@ -264,7 +264,6 @@ def test_force_flush_exports_only_one_batch(self, _, __, ___): result = self.processor.force_flush() self.assertTrue(result) - # 45 logs should remain self.assertEqual(len(self.processor._queue), 1) self.mock_exporter.export.assert_called_once()