diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py index 71e675cd3..d349b8890 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py @@ -18,8 +18,11 @@ # AWS_#_NAME attributes are not supported in python as they are not part of the Semantic Conventions. # TODO:Move to Semantic Conventions when these attributes are added. +AWS_AUTH_ACCESS_KEY: str = "aws.auth.account.access_key" +AWS_AUTH_REGION: str = "aws.auth.region" AWS_SQS_QUEUE_URL: str = "aws.sqs.queue.url" AWS_SQS_QUEUE_NAME: str = "aws.sqs.queue.name" +AWS_KINESIS_STREAM_ARN: str = "aws.kinesis.stream.arn" AWS_KINESIS_STREAM_NAME: str = "aws.kinesis.stream.name" AWS_BEDROCK_DATA_SOURCE_ID: str = "aws.bedrock.data_source.id" AWS_BEDROCK_KNOWLEDGE_BASE_ID: str = "aws.bedrock.knowledge_base.id" @@ -33,4 +36,8 @@ AWS_LAMBDA_FUNCTION_NAME: str = "aws.lambda.function.name" AWS_LAMBDA_RESOURCEMAPPING_ID: str = "aws.lambda.resource_mapping.id" AWS_LAMBDA_FUNCTION_ARN: str = "aws.lambda.function.arn" +AWS_DYNAMODB_TABLE_ARN: str = "aws.dynamodb.table.arn" +AWS_REMOTE_RESOURCE_ACCESS_KEY: str = "aws.remote.resource.account.access_key" +AWS_REMOTE_RESOURCE_ACCOUNT_ID: str = "aws.remote.resource.account.id" +AWS_REMOTE_RESOURCE_REGION: str = "aws.remote.resource.region" AWS_SERVICE_TYPE: str = "aws.service.type" diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py index 173f8492b..88eedb152 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py @@ -7,12 +7,16 @@ from urllib.parse import ParseResult, urlparse from amazon.opentelemetry.distro._aws_attribute_keys import ( + AWS_AUTH_ACCESS_KEY, + AWS_AUTH_REGION, AWS_BEDROCK_AGENT_ID, AWS_BEDROCK_DATA_SOURCE_ID, AWS_BEDROCK_GUARDRAIL_ARN, AWS_BEDROCK_GUARDRAIL_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID, AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER, + AWS_DYNAMODB_TABLE_ARN, + AWS_KINESIS_STREAM_ARN, AWS_KINESIS_STREAM_NAME, AWS_LAMBDA_FUNCTION_ARN, AWS_LAMBDA_FUNCTION_NAME, @@ -22,7 +26,10 @@ AWS_REMOTE_DB_USER, AWS_REMOTE_ENVIRONMENT, AWS_REMOTE_OPERATION, + AWS_REMOTE_RESOURCE_ACCESS_KEY, + AWS_REMOTE_RESOURCE_ACCOUNT_ID, AWS_REMOTE_RESOURCE_IDENTIFIER, + AWS_REMOTE_RESOURCE_REGION, AWS_REMOTE_RESOURCE_TYPE, AWS_REMOTE_SERVICE, AWS_SECRETSMANAGER_SECRET_ARN, @@ -56,6 +63,7 @@ SERVICE_METRIC, MetricAttributeGenerator, ) +from amazon.opentelemetry.distro.regional_resource_arn_parser import RegionalResourceArnParser from amazon.opentelemetry.distro.sqs_url_parser import SqsUrlParser from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import BoundedAttributes, ReadableSpan @@ -148,7 +156,11 @@ def _generate_dependency_metric_attributes(span: ReadableSpan, resource: Resourc _set_service(resource, span, attributes) _set_egress_operation(span, attributes) _set_remote_service_and_operation(span, attributes) - _set_remote_type_and_identifier(span, attributes) + is_remote_identifier_present = _set_remote_type_and_identifier(span, attributes) + if is_remote_identifier_present: + is_remote_account_id_present = _set_remote_account_id_and_region(span, attributes) + if not is_remote_account_id_present: + _set_remote_access_key_and_region(span, attributes) _set_remote_environment(span, attributes) _set_remote_db_user(span, attributes) _set_span_kind_for_dependency(span, attributes) @@ -383,7 +395,7 @@ def _generate_remote_operation(span: ReadableSpan) -> str: # pylint: disable=too-many-branches,too-many-statements -def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttributes) -> None: +def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttributes) -> bool: """ Remote resource attributes {@link AwsAttributeKeys#AWS_REMOTE_RESOURCE_TYPE} and {@link AwsAttributeKeys#AWS_REMOTE_RESOURCE_IDENTIFIER} are used to store information about the resource associated with @@ -403,9 +415,23 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri if is_key_present(span, _AWS_TABLE_NAMES) and len(span.attributes.get(_AWS_TABLE_NAMES)) == 1: remote_resource_type = _NORMALIZED_DYNAMO_DB_SERVICE_NAME + "::Table" remote_resource_identifier = _escape_delimiters(span.attributes.get(_AWS_TABLE_NAMES)[0]) + elif is_key_present(span, AWS_DYNAMODB_TABLE_ARN): + remote_resource_type = _NORMALIZED_DYNAMO_DB_SERVICE_NAME + "::Table" + remote_resource_identifier = _escape_delimiters( + RegionalResourceArnParser.extract_dynamodb_table_name_from_arn( + span.attributes.get(AWS_DYNAMODB_TABLE_ARN) + ) + ) elif is_key_present(span, AWS_KINESIS_STREAM_NAME): remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream" remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_KINESIS_STREAM_NAME)) + elif is_key_present(span, AWS_KINESIS_STREAM_ARN): + remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream" + remote_resource_identifier = _escape_delimiters( + RegionalResourceArnParser.extract_kinesis_stream_name_from_arn( + span.attributes.get(AWS_KINESIS_STREAM_ARN) + ) + ) elif is_key_present(span, _AWS_BUCKET_NAME): remote_resource_type = _NORMALIZED_S3_SERVICE_NAME + "::Bucket" remote_resource_identifier = _escape_delimiters(span.attributes.get(_AWS_BUCKET_NAME)) @@ -442,27 +468,35 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri remote_resource_identifier = _escape_delimiters(span.attributes.get(GEN_AI_REQUEST_MODEL)) elif is_key_present(span, AWS_SECRETSMANAGER_SECRET_ARN): remote_resource_type = _NORMALIZED_SECRETSMANAGER_SERVICE_NAME + "::Secret" - remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_SECRETSMANAGER_SECRET_ARN)).split( - ":" - )[-1] + remote_resource_identifier = _escape_delimiters( + RegionalResourceArnParser.extract_resource_name_from_arn( + span.attributes.get(AWS_SECRETSMANAGER_SECRET_ARN) + ) + ) cloudformation_primary_identifier = _escape_delimiters(span.attributes.get(AWS_SECRETSMANAGER_SECRET_ARN)) elif is_key_present(span, AWS_SNS_TOPIC_ARN): remote_resource_type = _NORMALIZED_SNS_SERVICE_NAME + "::Topic" - remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_SNS_TOPIC_ARN)).split(":")[-1] + remote_resource_identifier = _escape_delimiters( + RegionalResourceArnParser.extract_resource_name_from_arn(span.attributes.get(AWS_SNS_TOPIC_ARN)) + ) cloudformation_primary_identifier = _escape_delimiters(span.attributes.get(AWS_SNS_TOPIC_ARN)) elif is_key_present(span, AWS_STEPFUNCTIONS_STATEMACHINE_ARN): remote_resource_type = _NORMALIZED_STEPFUNCTIONS_SERVICE_NAME + "::StateMachine" remote_resource_identifier = _escape_delimiters( - span.attributes.get(AWS_STEPFUNCTIONS_STATEMACHINE_ARN) - ).split(":")[-1] + RegionalResourceArnParser.extract_resource_name_from_arn( + span.attributes.get(AWS_STEPFUNCTIONS_STATEMACHINE_ARN) + ) + ) cloudformation_primary_identifier = _escape_delimiters( span.attributes.get(AWS_STEPFUNCTIONS_STATEMACHINE_ARN) ) elif is_key_present(span, AWS_STEPFUNCTIONS_ACTIVITY_ARN): remote_resource_type = _NORMALIZED_STEPFUNCTIONS_SERVICE_NAME + "::Activity" - remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_STEPFUNCTIONS_ACTIVITY_ARN)).split( - ":" - )[-1] + remote_resource_identifier = _escape_delimiters( + RegionalResourceArnParser.extract_resource_name_from_arn( + span.attributes.get(AWS_STEPFUNCTIONS_ACTIVITY_ARN) + ) + ) cloudformation_primary_identifier = _escape_delimiters(span.attributes.get(AWS_STEPFUNCTIONS_ACTIVITY_ARN)) elif is_key_present(span, AWS_LAMBDA_FUNCTION_NAME): # For non-Invoke Lambda operations, treat Lambda as a resource, @@ -491,6 +525,48 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri attributes[AWS_REMOTE_RESOURCE_TYPE] = remote_resource_type attributes[AWS_REMOTE_RESOURCE_IDENTIFIER] = remote_resource_identifier attributes[AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER] = cloudformation_primary_identifier + return True + return False + + +def _set_remote_account_id_and_region(span: ReadableSpan, attributes: BoundedAttributes) -> bool: + arn_attributes = [ + AWS_DYNAMODB_TABLE_ARN, + AWS_KINESIS_STREAM_ARN, + AWS_SNS_TOPIC_ARN, + AWS_SECRETSMANAGER_SECRET_ARN, + AWS_STEPFUNCTIONS_STATEMACHINE_ARN, + AWS_STEPFUNCTIONS_ACTIVITY_ARN, + AWS_BEDROCK_GUARDRAIL_ARN, + AWS_LAMBDA_FUNCTION_ARN, + ] + remote_account_id: Optional[str] = None + remote_region: Optional[str] = None + + if is_key_present(span, AWS_SQS_QUEUE_URL): + queue_url = _escape_delimiters(span.attributes.get(AWS_SQS_QUEUE_URL)) + remote_account_id = SqsUrlParser.get_account_id(queue_url) + remote_region = SqsUrlParser.get_region(queue_url) + else: + for arn_attribute in arn_attributes: + if is_key_present(span, arn_attribute): + arn = span.attributes.get(arn_attribute) + remote_account_id = RegionalResourceArnParser.get_account_id(arn) + remote_region = RegionalResourceArnParser.get_region(arn) + break + + if remote_account_id is not None and remote_region is not None: + attributes[AWS_REMOTE_RESOURCE_ACCOUNT_ID] = remote_account_id + attributes[AWS_REMOTE_RESOURCE_REGION] = remote_region + return True + return False + + +def _set_remote_access_key_and_region(span: ReadableSpan, attributes: BoundedAttributes) -> None: + if is_key_present(span, AWS_AUTH_ACCESS_KEY): + attributes[AWS_REMOTE_RESOURCE_ACCESS_KEY] = span.attributes.get(AWS_AUTH_ACCESS_KEY) + if is_key_present(span, AWS_AUTH_REGION): + attributes[AWS_REMOTE_RESOURCE_REGION] = span.attributes.get(AWS_AUTH_REGION) def _set_remote_environment(span: ReadableSpan, attributes: BoundedAttributes) -> None: diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py index 129a4c0e2..b3f6c99be 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py @@ -75,3 +75,7 @@ def get_aws_region() -> Optional[str]: """ botocore_session = get_aws_session() return botocore_session.get_config_variable("region") if botocore_session else None + + +def is_account_id(input_str: str) -> bool: + return input_str is not None and input_str.isdigit() diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py index ffc81b348..7eb9a2066 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py @@ -3,7 +3,13 @@ # Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License. import importlib +from botocore.exceptions import ClientError + from amazon.opentelemetry.distro._aws_attribute_keys import ( + AWS_AUTH_ACCESS_KEY, + AWS_AUTH_REGION, + AWS_DYNAMODB_TABLE_ARN, + AWS_KINESIS_STREAM_ARN, AWS_KINESIS_STREAM_NAME, AWS_LAMBDA_FUNCTION_ARN, AWS_LAMBDA_FUNCTION_NAME, @@ -20,7 +26,14 @@ _BedrockAgentRuntimeExtension, _BedrockExtension, ) -from opentelemetry.instrumentation.botocore.extensions import _KNOWN_EXTENSIONS +from opentelemetry.instrumentation.botocore import ( + BotocoreInstrumentor, + _apply_response_attributes, + _determine_call_context, + _safe_invoke, +) +from opentelemetry.instrumentation.botocore.extensions import _KNOWN_EXTENSIONS, _find_extension +from opentelemetry.instrumentation.botocore.extensions.dynamodb import _DynamoDbExtension from opentelemetry.instrumentation.botocore.extensions.lmbd import _LambdaExtension from opentelemetry.instrumentation.botocore.extensions.sns import _SnsExtension from opentelemetry.instrumentation.botocore.extensions.sqs import _SqsExtension @@ -30,6 +43,8 @@ _BotocoreInstrumentorContext, _BotoResultT, ) +from opentelemetry.instrumentation.botocore.utils import get_server_attributes +from opentelemetry.instrumentation.utils import is_instrumentation_enabled, suppress_http_instrumentation from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace.span import Span @@ -39,6 +54,7 @@ def _apply_botocore_instrumentation_patches() -> None: Adds patches to provide additional support and Java parity for Kinesis, S3, and SQS. """ + _apply_botocore_api_call_patch() _apply_botocore_kinesis_patch() _apply_botocore_s3_patch() _apply_botocore_sqs_patch() @@ -47,6 +63,7 @@ def _apply_botocore_instrumentation_patches() -> None: _apply_botocore_sns_patch() _apply_botocore_stepfunctions_patch() _apply_botocore_lambda_patch() + _apply_botocore_dynamodb_patch() def _apply_botocore_lambda_patch() -> None: @@ -208,6 +225,115 @@ def _apply_botocore_bedrock_patch() -> None: # bedrock-runtime is handled by upstream +def _apply_botocore_dynamodb_patch() -> None: + """Botocore instrumentation patch for DynamoDB + + This patch adds an extension to the upstream's list of known extensions for DynamoDB. + Extensions allow for custom logic for adding service-specific information to + spans, such as attributes. Specifically, we are adding logic to add the + `aws.table.arn` attribute, to be used to generate RemoteTarget and achieve + parity with the Java instrumentation. + """ + old_on_success = _DynamoDbExtension.on_success + + def patch_on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _BotocoreInstrumentorContext): + old_on_success(self, span, result, instrumentor_context) + table = result.get("Table", {}) + table_arn = table.get("TableArn") + if table_arn: + span.set_attribute(AWS_DYNAMODB_TABLE_ARN, table_arn) + + _DynamoDbExtension.on_success = patch_on_success + + +def _apply_botocore_api_call_patch() -> None: + # pylint: disable=too-many-locals + def patched_api_call(self, original_func, instance, args, kwargs): + """Botocore instrumentation patch to capture AWS authentication details + + This patch extends the upstream implementation to include additional AWS authentication + attributes: + - aws.auth.account.access_key + - aws.auth.region + + Note: Current implementation duplicates upstream code in v1.33.x-0.54bx. Future improvements should: + 1. Propose refactoring upstream _patched_api_call into smaller components + 2. Apply targeted patches to these components to reduce code duplication + + Reference: https://github.com/open-telemetry/opentelemetry-python-contrib/blob/ + release/v1.33.x-0.54bx/instrumentation/opentelemetry-instrumentation-botocore/src/ + opentelemetry/instrumentation/botocore/__init__.py#L263 + """ + if not is_instrumentation_enabled(): + return original_func(*args, **kwargs) + + call_context = _determine_call_context(instance, args) + if call_context is None: + return original_func(*args, **kwargs) + + extension = _find_extension(call_context) + if not extension.should_trace_service_call(): + return original_func(*args, **kwargs) + + attributes = { + SpanAttributes.RPC_SYSTEM: "aws-api", + SpanAttributes.RPC_SERVICE: call_context.service_id, + SpanAttributes.RPC_METHOD: call_context.operation, + # TODO: update when semantic conventions exist + "aws.region": call_context.region, + **get_server_attributes(call_context.endpoint_url), + AWS_AUTH_REGION: call_context.region, + } + + credentials = instance._get_credentials() + if credentials is not None: + access_key = credentials.access_key + if access_key is not None: + attributes[AWS_AUTH_ACCESS_KEY] = access_key + + _safe_invoke(extension.extract_attributes, attributes) + end_span_on_exit = extension.should_end_span_on_exit() + + tracer = self._get_tracer(extension) + event_logger = self._get_event_logger(extension) + meter = self._get_meter(extension) + metrics = self._get_metrics(extension, meter) + instrumentor_ctx = _BotocoreInstrumentorContext( + event_logger=event_logger, + metrics=metrics, + ) + with tracer.start_as_current_span( + call_context.span_name, + kind=call_context.span_kind, + attributes=attributes, + # tracing streaming services require to close the span manually + # at a later time after the stream has been consumed + end_on_exit=end_span_on_exit, + ) as span: + _safe_invoke(extension.before_service_call, span, instrumentor_ctx) + self._call_request_hook(span, call_context) + + try: + with suppress_http_instrumentation(): + result = None + try: + result = original_func(*args, **kwargs) + except ClientError as error: + result = getattr(error, "response", None) + _apply_response_attributes(span, result) + _safe_invoke(extension.on_error, span, error, instrumentor_ctx) + raise + _apply_response_attributes(span, result) + _safe_invoke(extension.on_success, span, result, instrumentor_ctx) + finally: + _safe_invoke(extension.after_service_call, instrumentor_ctx) + self._call_response_hook(span, call_context, result) + + return result + + BotocoreInstrumentor._patched_api_call = patched_api_call + + # The OpenTelemetry Authors code def _lazy_load(module, cls): """Clone of upstream opentelemetry.instrumentation.botocore.extensions.lazy_load @@ -265,3 +391,6 @@ def extract_attributes(self, attributes: _AttributeMapT): stream_name = self._call_context.params.get("StreamName") if stream_name: attributes[AWS_KINESIS_STREAM_NAME] = stream_name + stream_arn = self._call_context.params.get("StreamARN") + if stream_arn: + attributes[AWS_KINESIS_STREAM_ARN] = stream_arn diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/regional_resource_arn_parser.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/regional_resource_arn_parser.py new file mode 100644 index 000000000..97c1af8c9 --- /dev/null +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/regional_resource_arn_parser.py @@ -0,0 +1,39 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from typing import Optional + +from amazon.opentelemetry.distro._utils import is_account_id + + +class RegionalResourceArnParser: + @staticmethod + def get_account_id(arn: str) -> Optional[str]: + parts = _get_arn_parts(arn) + return parts[4] if parts else None + + @staticmethod + def get_region(arn: str) -> Optional[str]: + parts = _get_arn_parts(arn) + return parts[3] if parts else None + + @staticmethod + def extract_dynamodb_table_name_from_arn(arn: str) -> Optional[str]: + parts = _get_arn_parts(arn) + return parts[-1].replace("table/", "") if parts else None + + @staticmethod + def extract_kinesis_stream_name_from_arn(arn: str) -> Optional[str]: + parts = _get_arn_parts(arn) + return parts[-1].replace("stream/", "") if parts else None + + @staticmethod + def extract_resource_name_from_arn(arn: str) -> Optional[str]: + parts = _get_arn_parts(arn) + return parts[-1] if parts else None + + +def _get_arn_parts(arn: str) -> Optional[list]: + if not arn or not arn.startswith("arn"): + return None + parts = arn.split(":") + return parts if len(parts) >= 6 and is_account_id(parts[4]) else None diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py index 4cc5e2935..03b26fc1a 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py @@ -1,6 +1,8 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -from typing import List, Optional +from typing import List, Optional, Tuple + +from amazon.opentelemetry.distro._utils import is_account_id _HTTP_SCHEMA: str = "http://" _HTTPS_SCHEMA: str = "https://" @@ -12,28 +14,54 @@ def get_queue_name(url: str) -> Optional[str]: """ Best-effort logic to extract queue name from an HTTP url. This method should only be used with a string that is, with reasonably high confidence, an SQS queue URL. Handles new/legacy/some custom URLs. Essentially, we require - that the URL should have exactly three parts, delimited by /'s (excluding schema), the second part should be a - 12-digit account id, and the third part should be a valid queue name, per SQS naming conventions. + that the URL should have exactly three parts, delimited by /'s (excluding schema), the second part should be an + account id consisting of digits, and the third part should be a valid queue name, per SQS naming conventions. """ if url is None: return None - url = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "") - split_url: List[Optional[str]] = url.split("/") - if len(split_url) == 3 and _is_account_id(split_url[1]) and _is_valid_queue_name(split_url[2]): + url_without_protocol = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "") + split_url: List[Optional[str]] = url_without_protocol.split("/") + if len(split_url) == 3 and is_account_id(split_url[1]) and _is_valid_queue_name(split_url[2]): return split_url[2] return None + @staticmethod + def get_account_id(url: str) -> Optional[str]: + """ + Extracts the account ID from an SQS URL. + """ + return SqsUrlParser.parse_url(url)[1] -def _is_account_id(input_str: str) -> bool: - if input_str is None or len(input_str) != 12: - return False + @staticmethod + def get_region(url: str) -> Optional[str]: + """ + Extracts the region from an SQS URL. + """ + return SqsUrlParser.parse_url(url)[2] - try: - int(input_str) - except ValueError: - return False + @staticmethod + def parse_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str]]: + """ + Parses an SQS URL and extracts its components. + URL Format: https://sqs..amazonaws.com// + """ + if url is None: + return None, None, None - return True + url_without_protocol = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "") + split_url: List[Optional[str]] = url_without_protocol.split("/") + if ( + len(split_url) != 3 + or not is_account_id(split_url[1]) + or not _is_valid_queue_name(split_url[2]) + or not split_url[0].lower().startswith("sqs") + ): + return None, None, None + + domain: str = split_url[0] + domain_parts: List[str] = domain.split(".") + + return split_url[2], split_url[1], domain_parts[1] if len(domain_parts) == 4 else None def _is_valid_queue_name(input_str: str) -> bool: diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py index f99b0d154..abf03bba1 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py @@ -9,6 +9,8 @@ from unittest.mock import MagicMock from amazon.opentelemetry.distro._aws_attribute_keys import ( + AWS_AUTH_ACCESS_KEY, + AWS_AUTH_REGION, AWS_BEDROCK_AGENT_ID, AWS_BEDROCK_DATA_SOURCE_ID, AWS_BEDROCK_GUARDRAIL_ARN, @@ -16,6 +18,8 @@ AWS_BEDROCK_KNOWLEDGE_BASE_ID, AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER, AWS_CONSUMER_PARENT_SPAN_KIND, + AWS_DYNAMODB_TABLE_ARN, + AWS_KINESIS_STREAM_ARN, AWS_KINESIS_STREAM_NAME, AWS_LAMBDA_FUNCTION_ARN, AWS_LAMBDA_FUNCTION_NAME, @@ -25,7 +29,10 @@ AWS_REMOTE_DB_USER, AWS_REMOTE_ENVIRONMENT, AWS_REMOTE_OPERATION, + AWS_REMOTE_RESOURCE_ACCESS_KEY, + AWS_REMOTE_RESOURCE_ACCOUNT_ID, AWS_REMOTE_RESOURCE_IDENTIFIER, + AWS_REMOTE_RESOURCE_REGION, AWS_REMOTE_RESOURCE_TYPE, AWS_REMOTE_SERVICE, AWS_SECRETSMANAGER_SECRET_ARN, @@ -54,6 +61,8 @@ _AWS_REMOTE_OPERATION_VALUE: str = "AWS remote operation" _SERVICE_NAME_VALUE: str = "Service name" _SPAN_NAME_VALUE: str = "Span name" +_AWS_REMOTE_RESOURCE_REGION: str = "us-east-1" +_AWS_REMOTE_RESOURCE_ACCESS_KEY: str = "AWS access key" _UNKNOWN_SERVICE: str = "UnknownService" _UNKNOWN_OPERATION: str = "UnknownOperation" @@ -1016,21 +1025,31 @@ def _validate_peer_service_does_override(self, remote_service_key: str) -> None: self._mock_attribute([remote_service_key, SpanAttributes.PEER_SERVICE], [None, None]) def test_sdk_client_span_with_remote_resource_attributes(self): - keys: List[str] = [ - SpanAttributes.RPC_SYSTEM, - ] - values: List[str] = [ - "aws-api", - ] + keys: List[str] = [SpanAttributes.RPC_SYSTEM, AWS_AUTH_ACCESS_KEY, AWS_AUTH_REGION] + values: List[str] = ["aws-api", _AWS_REMOTE_RESOURCE_ACCESS_KEY, _AWS_REMOTE_RESOURCE_REGION] self._mock_attribute(keys, values) # Validate behaviour of aws bucket name attribute, then remove it. self._mock_attribute([SpanAttributes.AWS_S3_BUCKET], ["aws_s3_bucket_name"], keys, values) - self._validate_remote_resource_attributes("AWS::S3::Bucket", "aws_s3_bucket_name") + self._validate_remote_resource_attributes( + "AWS::S3::Bucket", + "aws_s3_bucket_name", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([SpanAttributes.AWS_S3_BUCKET], [None]) # Validate behaviour of AWS_SQS_QUEUE_NAME attribute, then remove it self._mock_attribute([AWS_SQS_QUEUE_NAME], ["aws_queue_name"], keys, values) - self._validate_remote_resource_attributes("AWS::SQS::Queue", "aws_queue_name") + self._validate_remote_resource_attributes( + "AWS::SQS::Queue", + "aws_queue_name", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_SQS_QUEUE_NAME], [None]) # Validate behaviour of having both AWS_SQS_QUEUE_NAME and AWS_SQS_QUEUE_URL attribute, then remove them. @@ -1042,23 +1061,72 @@ def test_sdk_client_span_with_remote_resource_attributes(self): values, ) self._validate_remote_resource_attributes( - "AWS::SQS::Queue", "aws_queue_name", "https://sqs.us-east-2.amazonaws.com/123456789012/Queue" + "AWS::SQS::Queue", + "aws_queue_name", + "https://sqs.us-east-2.amazonaws.com/123456789012/Queue", + "us-east-2", + "123456789012", + None, ) self._mock_attribute([AWS_SQS_QUEUE_URL, AWS_SQS_QUEUE_NAME], [None, None]) # Valid queue name with invalid queue URL, we should default to using the queue name. self._mock_attribute([AWS_SQS_QUEUE_URL, AWS_SQS_QUEUE_NAME], ["invalidUrl", "aws_queue_name"], keys, values) - self._validate_remote_resource_attributes("AWS::SQS::Queue", "aws_queue_name", "invalidUrl") + self._validate_remote_resource_attributes( + "AWS::SQS::Queue", + "aws_queue_name", + "invalidUrl", + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_SQS_QUEUE_URL, AWS_SQS_QUEUE_NAME], [None, None]) + # Validate behaviour of AWS_KINESIS_STREAM_ARN attribute, then remove it. + self._mock_attribute( + [AWS_KINESIS_STREAM_ARN], ["arn:aws:kinesis:us-west-2:123456789012:stream/aws_stream_name"], keys, values + ) + self._validate_remote_resource_attributes( + "AWS::Kinesis::Stream", + "aws_stream_name", + None, + "us-west-2", + "123456789012", + None, + ) + self._mock_attribute([AWS_KINESIS_STREAM_ARN], [None]) + # Validate behaviour of AWS_KINESIS_STREAM_NAME attribute, then remove it. self._mock_attribute([AWS_KINESIS_STREAM_NAME], ["aws_stream_name"], keys, values) - self._validate_remote_resource_attributes("AWS::Kinesis::Stream", "aws_stream_name") + self._validate_remote_resource_attributes( + "AWS::Kinesis::Stream", + "aws_stream_name", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_KINESIS_STREAM_NAME], [None]) + # Validate behaviour of AWS_DYNAMODB_TABLE_ARN attribute, then remove it. + self._mock_attribute( + [AWS_DYNAMODB_TABLE_ARN], ["arn:aws:dynamodb:us-west-2:123456789012:table/aws_table_name"], keys, values + ) + self._validate_remote_resource_attributes( + "AWS::DynamoDB::Table", "aws_table_name", None, "us-west-2", "123456789012", None + ) + self._mock_attribute([AWS_DYNAMODB_TABLE_ARN], [None]) + # Validate behaviour of SpanAttributes.AWS_DYNAMODB_TABLE_NAMES attribute with one table name, then remove it. self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [["aws_table_name"]], keys, values) - self._validate_remote_resource_attributes("AWS::DynamoDB::Table", "aws_table_name") + self._validate_remote_resource_attributes( + "AWS::DynamoDB::Table", + "aws_table_name", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [None]) # Validate behaviour of SpanAttributes.AWS_DYNAMODB_TABLE_NAMES attribute with no table name, then remove it. @@ -1075,22 +1143,50 @@ def test_sdk_client_span_with_remote_resource_attributes(self): # Validate behaviour of AWS_TABLE_NAME attribute with special chars(|), then remove it. self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [["aws_table|name"]], keys, values) - self._validate_remote_resource_attributes("AWS::DynamoDB::Table", "aws_table^|name") + self._validate_remote_resource_attributes( + "AWS::DynamoDB::Table", + "aws_table^|name", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [None]) # Validate behaviour of AWS_TABLE_NAME attribute with special chars(^), then remove it. self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [["aws_table^name"]], keys, values) - self._validate_remote_resource_attributes("AWS::DynamoDB::Table", "aws_table^^name") + self._validate_remote_resource_attributes( + "AWS::DynamoDB::Table", + "aws_table^^name", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([SpanAttributes.AWS_DYNAMODB_TABLE_NAMES], [None]) # Validate behaviour of AWS_BEDROCK_AGENT_ID attribute, then remove it. self._mock_attribute([AWS_BEDROCK_AGENT_ID], ["test_agent_id"], keys, values) - self._validate_remote_resource_attributes("AWS::Bedrock::Agent", "test_agent_id") + self._validate_remote_resource_attributes( + "AWS::Bedrock::Agent", + "test_agent_id", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_BEDROCK_AGENT_ID], [None]) # Validate behaviour of AWS_BEDROCK_AGENT_ID attribute with special chars(^), then remove it. self._mock_attribute([AWS_BEDROCK_AGENT_ID], ["test_agent_^id"], keys, values) - self._validate_remote_resource_attributes("AWS::Bedrock::Agent", "test_agent_^^id") + self._validate_remote_resource_attributes( + "AWS::Bedrock::Agent", + "test_agent_^^id", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_BEDROCK_AGENT_ID], [None]) # Validate behaviour of AWS_BEDROCK_DATA_SOURCE_ID attribute, then remove it. @@ -1101,7 +1197,12 @@ def test_sdk_client_span_with_remote_resource_attributes(self): values, ) self._validate_remote_resource_attributes( - "AWS::Bedrock::DataSource", "test_datasource_id", "test_knowledge_base_id|test_datasource_id" + "AWS::Bedrock::DataSource", + "test_datasource_id", + "test_knowledge_base_id|test_datasource_id", + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, ) self._mock_attribute([AWS_BEDROCK_DATA_SOURCE_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID], [None, None]) @@ -1113,7 +1214,12 @@ def test_sdk_client_span_with_remote_resource_attributes(self): values, ) self._validate_remote_resource_attributes( - "AWS::Bedrock::DataSource", "test_datasource_^^id", "test_knowledge_base_^^id|test_datasource_^^id" + "AWS::Bedrock::DataSource", + "test_datasource_^^id", + "test_knowledge_base_^^id|test_datasource_^^id", + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, ) self._mock_attribute([AWS_BEDROCK_DATA_SOURCE_ID, AWS_BEDROCK_KNOWLEDGE_BASE_ID], [None, None]) @@ -1128,6 +1234,9 @@ def test_sdk_client_span_with_remote_resource_attributes(self): "AWS::Bedrock::Guardrail", "test_guardrail_id", "arn:aws:bedrock:us-east-1:123456789012:guardrail/test_guardrail_id", + "us-east-1", + "123456789012", + None, ) self._mock_attribute([AWS_BEDROCK_GUARDRAIL_ID, AWS_BEDROCK_GUARDRAIL_ARN], [None, None]) @@ -1142,27 +1251,58 @@ def test_sdk_client_span_with_remote_resource_attributes(self): "AWS::Bedrock::Guardrail", "test_guardrail_^^id", "arn:aws:bedrock:us-east-1:123456789012:guardrail/test_guardrail_^^id", + "us-east-1", + "123456789012", + None, ) self._mock_attribute([AWS_BEDROCK_GUARDRAIL_ID, AWS_BEDROCK_GUARDRAIL_ARN], [None, None]) # Validate behaviour of AWS_BEDROCK_KNOWLEDGE_BASE_ID attribute, then remove it. self._mock_attribute([AWS_BEDROCK_KNOWLEDGE_BASE_ID], ["test_knowledgeBase_id"], keys, values) - self._validate_remote_resource_attributes("AWS::Bedrock::KnowledgeBase", "test_knowledgeBase_id") + self._validate_remote_resource_attributes( + "AWS::Bedrock::KnowledgeBase", + "test_knowledgeBase_id", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_BEDROCK_KNOWLEDGE_BASE_ID], [None]) # Validate behaviour of AWS_BEDROCK_KNOWLEDGE_BASE_ID attribute with special chars(^), then remove it. self._mock_attribute([AWS_BEDROCK_KNOWLEDGE_BASE_ID], ["test_knowledgeBase_^id"], keys, values) - self._validate_remote_resource_attributes("AWS::Bedrock::KnowledgeBase", "test_knowledgeBase_^^id") + self._validate_remote_resource_attributes( + "AWS::Bedrock::KnowledgeBase", + "test_knowledgeBase_^^id", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_BEDROCK_KNOWLEDGE_BASE_ID], [None]) # Validate behaviour of GEN_AI_REQUEST_MODEL attribute, then remove it. self._mock_attribute([GEN_AI_REQUEST_MODEL], ["test.service_id"], keys, values) - self._validate_remote_resource_attributes("AWS::Bedrock::Model", "test.service_id") + self._validate_remote_resource_attributes( + "AWS::Bedrock::Model", + "test.service_id", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([GEN_AI_REQUEST_MODEL], [None]) # Validate behaviour of GEN_AI_REQUEST_MODEL attribute with special chars(^), then remove it. self._mock_attribute([GEN_AI_REQUEST_MODEL], ["test.service_^id"], keys, values) - self._validate_remote_resource_attributes("AWS::Bedrock::Model", "test.service_^^id") + self._validate_remote_resource_attributes( + "AWS::Bedrock::Model", + "test.service_^^id", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([GEN_AI_REQUEST_MODEL], [None]) # Validate behaviour of AWS_SECRETSMANAGER_SECRET_ARN attribute, then remove it. @@ -1176,13 +1316,21 @@ def test_sdk_client_span_with_remote_resource_attributes(self): "AWS::SecretsManager::Secret", "secret_name-lERW9H", "arn:aws:secretsmanager:us-east-1:123456789012:secret:secret_name-lERW9H", + "us-east-1", + "123456789012", + None, ) self._mock_attribute([AWS_SECRETSMANAGER_SECRET_ARN], [None]) # Validate behaviour of AWS_SNS_TOPIC_ARN attribute, then remove it. self._mock_attribute([AWS_SNS_TOPIC_ARN], ["arn:aws:sns:us-west-2:012345678901:test_topic"], keys, values) self._validate_remote_resource_attributes( - "AWS::SNS::Topic", "test_topic", "arn:aws:sns:us-west-2:012345678901:test_topic" + "AWS::SNS::Topic", + "test_topic", + "arn:aws:sns:us-west-2:012345678901:test_topic", + "us-west-2", + "012345678901", + None, ) self._mock_attribute([AWS_SNS_TOPIC_ARN], [None]) @@ -1197,26 +1345,39 @@ def test_sdk_client_span_with_remote_resource_attributes(self): "AWS::StepFunctions::StateMachine", "test_state_machine", "arn:aws:states:us-east-1:123456789012:stateMachine:test_state_machine", + "us-east-1", + "123456789012", + None, ) self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None]) # Validate behaviour of AWS_STEPFUNCTIONS_ACTIVITY_ARN attribute, then remove it. self._mock_attribute( [AWS_STEPFUNCTIONS_ACTIVITY_ARN], - ["arn:aws:states:us-east-1:007003123456789012:activity:testActivity"], + ["arn:aws:states:us-east-1:123456789012:activity:testActivity"], keys, values, ) self._validate_remote_resource_attributes( "AWS::StepFunctions::Activity", "testActivity", - "arn:aws:states:us-east-1:007003123456789012:activity:testActivity", + "arn:aws:states:us-east-1:123456789012:activity:testActivity", + "us-east-1", + "123456789012", + None, ) self._mock_attribute([AWS_STEPFUNCTIONS_ACTIVITY_ARN], [None]) # Validate behaviour of AWS_LAMBDA_RESOURCEMAPPING_ID attribute, then remove it. self._mock_attribute([AWS_LAMBDA_RESOURCEMAPPING_ID], ["aws_event_source_mapping_id"], keys, values) - self._validate_remote_resource_attributes("AWS::Lambda::EventSourceMapping", "aws_event_source_mapping_id") + self._validate_remote_resource_attributes( + "AWS::Lambda::EventSourceMapping", + "aws_event_source_mapping_id", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_LAMBDA_RESOURCEMAPPING_ID], [None]) # Validate behaviour of AWS_LAMBDA_RESOURCE_MAPPING_ID, @@ -1227,7 +1388,14 @@ def test_sdk_client_span_with_remote_resource_attributes(self): keys, values, ) - self._validate_remote_resource_attributes("AWS::Lambda::EventSourceMapping", "aws_event_source_mapping_id") + self._validate_remote_resource_attributes( + "AWS::Lambda::EventSourceMapping", + "aws_event_source_mapping_id", + None, + _AWS_REMOTE_RESOURCE_REGION, + None, + _AWS_REMOTE_RESOURCE_ACCESS_KEY, + ) self._mock_attribute([AWS_LAMBDA_RESOURCEMAPPING_ID], [None]) # Test AWS Lambda Invoke scenario with default lambda remote environment @@ -1281,7 +1449,9 @@ def test_sdk_client_span_with_remote_resource_attributes(self): keys, values, ) - self._validate_remote_resource_attributes("AWS::Lambda::Function", "testLambdaFunction", lambda_arn) + self._validate_remote_resource_attributes( + "AWS::Lambda::Function", "testLambdaFunction", lambda_arn, "us-east-1", "123456789012", None + ) self._mock_attribute( [AWS_LAMBDA_FUNCTION_NAME, AWS_LAMBDA_FUNCTION_ARN, SpanAttributes.RPC_METHOD], [None, None, None] ) @@ -1306,7 +1476,12 @@ def test_sdk_client_span_with_remote_resource_attributes(self): values, ) self._validate_remote_resource_attributes( - "AWS::Lambda::Function", "testLambdaName", "arn:aws:lambda:us-east-1:123456789012:function:testLambdaName" + "AWS::Lambda::Function", + "testLambdaName", + "arn:aws:lambda:us-east-1:123456789012:function:testLambdaName", + "us-east-1", + "123456789012", + None, ) self._mock_attribute( [ @@ -1342,6 +1517,69 @@ def test_sdk_client_span_with_remote_resource_attributes(self): [None, None, None, None], ) + # Cross account support + # Invalid arn but account access key is available + self._mock_attribute( + [AWS_STEPFUNCTIONS_STATEMACHINE_ARN], + ["invalid_arn"], + keys, + values, + ) + self._validate_remote_resource_attributes(None, None, None) + self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None]) + + # Invalid arn and no account access key + self._mock_attribute( + [AWS_STEPFUNCTIONS_STATEMACHINE_ARN, SpanAttributes.RPC_SYSTEM], ["invalid_arn", "aws-api"] + ) + self._validate_remote_resource_attributes(None, None, None) + self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None]) + + # Both account access key and account id are not available + self._mock_attribute( + [SpanAttributes.AWS_S3_BUCKET, SpanAttributes.RPC_SYSTEM], ["aws_s3_bucket_name", "aws-api"] + ) + self._validate_remote_resource_attributes("AWS::S3::Bucket", "aws_s3_bucket_name") + self._mock_attribute([SpanAttributes.AWS_S3_BUCKET], [None]) + + # Account access key is not available + self._mock_attribute( + [AWS_STEPFUNCTIONS_STATEMACHINE_ARN, SpanAttributes.RPC_SYSTEM], + ["arn:aws:states:us-east-1:123456789123:stateMachine:testStateMachine", "aws-api"], + ) + self._validate_remote_resource_attributes( + "AWS::StepFunctions::StateMachine", + "testStateMachine", + "arn:aws:states:us-east-1:123456789123:stateMachine:testStateMachine", + "us-east-1", + "123456789123", + None, + ) + self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None]) + + # Arn with invalid account id + self._mock_attribute( + [AWS_STEPFUNCTIONS_STATEMACHINE_ARN, SpanAttributes.RPC_SYSTEM], + ["arn:aws:states:us-east-1:invalid_account_id:stateMachine:testStateMachine", "aws-api"], + ) + self._validate_remote_resource_attributes(None, None, None) + self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None]) + + # Arn with invalid region + self._mock_attribute( + [AWS_STEPFUNCTIONS_STATEMACHINE_ARN, SpanAttributes.RPC_SYSTEM], + ["arn:aws:states:invalid_region:123456789123:stateMachine:testStateMachine", "aws-api"], + ) + self._validate_remote_resource_attributes( + "AWS::StepFunctions::StateMachine", + "testStateMachine", + "arn:aws:states:invalid_region:123456789123:stateMachine:testStateMachine", + "invalid_region", + "123456789123", + None, + ) + self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None]) + self._mock_attribute([SpanAttributes.RPC_SYSTEM], [None]) def test_client_db_span_with_remote_resource_attributes(self): @@ -1621,36 +1859,45 @@ def test_client_db_span_with_remote_resource_attributes(self): ) def _validate_remote_resource_attributes( - self, expected_type: str, expected_identifier: str, expected_cfn_primary_id: str = None + self, + expected_type: str, + expected_identifier: str, + expected_cfn_primary_id: str = None, + expected_region: str = None, + expected_account_id: str = None, + expected_access_key: str = None, ) -> None: # If expected_cfn_primary_id is not provided, it defaults to expected_identifier if expected_cfn_primary_id is None: expected_cfn_primary_id = expected_identifier # Client, Producer, and Consumer spans should generate the expected remote resource attribute - self.span_mock.kind = SpanKind.CLIENT - actual_attributes = _GENERATOR.generate_metric_attributes_dict_from_span(self.span_mock, self.resource).get( - DEPENDENCY_METRIC - ) - self.assertEqual(expected_type, actual_attributes.get(AWS_REMOTE_RESOURCE_TYPE)) - self.assertEqual(expected_identifier, actual_attributes.get(AWS_REMOTE_RESOURCE_IDENTIFIER)) - self.assertEqual(expected_cfn_primary_id, actual_attributes.get(AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER)) + for kind in [SpanKind.CLIENT, SpanKind.PRODUCER, SpanKind.CONSUMER]: + self.span_mock.kind = kind + actual_attributes = _GENERATOR.generate_metric_attributes_dict_from_span(self.span_mock, self.resource).get( + DEPENDENCY_METRIC + ) + self.assertEqual(expected_type, actual_attributes.get(AWS_REMOTE_RESOURCE_TYPE)) + self.assertEqual(expected_identifier, actual_attributes.get(AWS_REMOTE_RESOURCE_IDENTIFIER)) + self.assertEqual(expected_cfn_primary_id, actual_attributes.get(AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER)) + + # Cross account support + if expected_region is not None: + self.assertEqual(expected_region, actual_attributes.get(AWS_REMOTE_RESOURCE_REGION)) + else: + self.assertNotIn(AWS_REMOTE_RESOURCE_REGION, actual_attributes) - self.span_mock.kind = SpanKind.PRODUCER - actual_attributes = _GENERATOR.generate_metric_attributes_dict_from_span(self.span_mock, self.resource).get( - DEPENDENCY_METRIC - ) - self.assertEqual(expected_type, actual_attributes.get(AWS_REMOTE_RESOURCE_TYPE)) - self.assertEqual(expected_identifier, actual_attributes.get(AWS_REMOTE_RESOURCE_IDENTIFIER)) - self.assertEqual(expected_cfn_primary_id, actual_attributes.get(AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER)) + if expected_access_key is not None: + self.assertEqual(expected_access_key, actual_attributes.get(AWS_REMOTE_RESOURCE_ACCESS_KEY)) + self.assertNotIn(AWS_REMOTE_RESOURCE_ACCOUNT_ID, actual_attributes) + else: + self.assertNotIn(AWS_REMOTE_RESOURCE_ACCESS_KEY, actual_attributes) - self.span_mock.kind = SpanKind.CONSUMER - actual_attributes = _GENERATOR.generate_metric_attributes_dict_from_span(self.span_mock, self.resource).get( - DEPENDENCY_METRIC - ) - self.assertEqual(expected_type, actual_attributes.get(AWS_REMOTE_RESOURCE_TYPE)) - self.assertEqual(expected_identifier, actual_attributes.get(AWS_REMOTE_RESOURCE_IDENTIFIER)) - self.assertEqual(expected_cfn_primary_id, actual_attributes.get(AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER)) + if expected_account_id is not None: + self.assertEqual(expected_account_id, actual_attributes.get(AWS_REMOTE_RESOURCE_ACCOUNT_ID)) + self.assertNotIn(AWS_REMOTE_RESOURCE_ACCESS_KEY, actual_attributes) + else: + self.assertNotIn(AWS_REMOTE_RESOURCE_ACCOUNT_ID, actual_attributes) # Server span should not generate remote resource attribute self.span_mock.kind = SpanKind.SERVER @@ -1660,6 +1907,9 @@ def _validate_remote_resource_attributes( self.assertNotIn(AWS_REMOTE_RESOURCE_TYPE, actual_attributes) self.assertNotIn(AWS_REMOTE_RESOURCE_IDENTIFIER, actual_attributes) self.assertNotIn(AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER, actual_attributes) + self.assertNotIn(AWS_REMOTE_RESOURCE_REGION, actual_attributes) + self.assertNotIn(AWS_REMOTE_RESOURCE_ACCOUNT_ID, actual_attributes) + self.assertNotIn(AWS_REMOTE_RESOURCE_ACCESS_KEY, actual_attributes) self._mock_attribute([SpanAttributes.DB_SYSTEM], [None]) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py index 8eff6f2e6..857c13265 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py @@ -14,10 +14,12 @@ AWS_GEVENT_PATCH_MODULES, apply_instrumentation_patches, ) +from opentelemetry.instrumentation.botocore import BotocoreInstrumentor from opentelemetry.instrumentation.botocore.extensions import _KNOWN_EXTENSIONS from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace.span import Span +_STREAM_ARN: str = "arn:aws:kinesis:us-west-2:000000000000:stream/streamName" _STREAM_NAME: str = "streamName" _BUCKET_NAME: str = "bucketName" _QUEUE_NAME: str = "queueName" @@ -34,6 +36,7 @@ _ACTIVITY_ARN: str = "arn:aws:states:us-east-1:007003123456789012:activity:testActivity" _LAMBDA_FUNCTION_NAME: str = "lambdaFunctionName" _LAMBDA_SOURCE_MAPPING_ID: str = "lambdaEventSourceMappingID" +_TABLE_ARN: str = "arn:aws:dynamodb:us-west-2:123456789012:table/testTable" # Patch names IMPORTLIB_METADATA_VERSION_PATCH: str = "amazon.opentelemetry.distro._utils.version" @@ -161,6 +164,9 @@ def _test_unpatched_botocore_instrumentation(self): # Lambda self.assertTrue("lambda" in _KNOWN_EXTENSIONS, "Upstream has removed the Lambda extension") + # DynamoDB + self.assertTrue("dynamodb" in _KNOWN_EXTENSIONS, "Upstream has removed a DynamoDB extension") + def _test_unpatched_gevent_instrumentation(self): self.assertFalse(gevent.monkey.is_module_patched("os"), "gevent os module has been patched") self.assertFalse(gevent.monkey.is_module_patched("thread"), "gevent thread module has been patched") @@ -182,6 +188,8 @@ def _test_patched_botocore_instrumentation(self): kinesis_attributes: Dict[str, str] = _do_extract_kinesis_attributes() self.assertTrue("aws.kinesis.stream.name" in kinesis_attributes) self.assertEqual(kinesis_attributes["aws.kinesis.stream.name"], _STREAM_NAME) + self.assertTrue("aws.kinesis.stream.arn" in kinesis_attributes) + self.assertEqual(kinesis_attributes["aws.kinesis.stream.arn"], _STREAM_ARN) # S3 self.assertTrue("s3" in _KNOWN_EXTENSIONS) @@ -247,6 +255,162 @@ def _test_patched_botocore_instrumentation(self): self.assertTrue("aws.lambda.resource_mapping.id" in lambda_attributes) self.assertEqual(lambda_attributes["aws.lambda.resource_mapping.id"], _LAMBDA_SOURCE_MAPPING_ID) + # DynamoDB + self.assertTrue("dynamodb" in _KNOWN_EXTENSIONS) + dynamodb_success_attributes: Dict[str, str] = _do_on_success_dynamodb() + self.assertTrue("aws.dynamodb.table.arn" in dynamodb_success_attributes) + self.assertEqual(dynamodb_success_attributes["aws.dynamodb.table.arn"], _TABLE_ARN) + + # Access key + self._test_patched_api_call_with_credentials() + self._test_patched_api_call_with_no_credentials() + self._test_patched_api_call_with_no_access_key() + + def _test_patched_api_call_with_credentials(self): + # Create mocks + mock_tracer = MagicMock() + original_func: MagicMock = MagicMock(return_value={"ResponseMetadata": {"RequestId": "12345"}}) + instance: MagicMock = MagicMock() + span: MagicMock = MagicMock() + args = ("operation_name",) + kwargs = {} + initial_attributes = {} + mock_extension = _get_mock_extension() + mock_call_context = _get_mock_call_context() + + def mock_start_span(*args, **kwargs): + attributes = kwargs.get("attributes", {}) + initial_attributes.update(attributes) + cm = MagicMock() + cm.__enter__ = MagicMock(return_value=span) + cm.__exit__ = MagicMock(return_value=None) + return cm + + mock_tracer.start_as_current_span.side_effect = mock_start_span + + # Mock credentials + mock_credentials = MagicMock() + mock_credentials.access_key = "test-access-key" + instance._get_credentials.return_value = mock_credentials + instance.meta.region_name = "us-west-2" + + with patch( + "opentelemetry.instrumentation.botocore._determine_call_context", return_value=mock_call_context + ), patch("opentelemetry.instrumentation.botocore._find_extension", return_value=mock_extension), patch( + "opentelemetry.instrumentation.botocore.is_instrumentation_enabled", return_value=True + ), patch( + "amazon.opentelemetry.distro.patches._botocore_patches.get_server_attributes", return_value={} + ), patch( + "opentelemetry.instrumentation.botocore.get_tracer", return_value=mock_tracer + ), patch( + "opentelemetry.instrumentation.botocore.get_event_logger", return_value=MagicMock() + ), patch( + "opentelemetry.instrumentation.botocore.get_meter", return_value=MagicMock() + ): + instrumentor = BotocoreInstrumentor() + instrumentor.instrument() + instrumentor._patched_api_call(original_func, instance, args, kwargs) + + self.assertIn("aws.auth.account.access_key", initial_attributes) + self.assertEqual(initial_attributes["aws.auth.account.access_key"], "test-access-key") + self.assertIn("aws.auth.region", initial_attributes) + self.assertEqual(initial_attributes["aws.auth.region"], "us-west-2") + instrumentor.uninstrument() + + def _test_patched_api_call_with_no_credentials(self): + # Create mocks + mock_tracer = MagicMock() + original_func: MagicMock = MagicMock(return_value={"ResponseMetadata": {"RequestId": "12345"}}) + instance: MagicMock = MagicMock() + span: MagicMock = MagicMock() + args = ("operation_name",) + kwargs = {} + initial_attributes = {} + mock_extension = _get_mock_extension() + mock_call_context = _get_mock_call_context() + + def mock_start_span(*args, **kwargs): + attributes = kwargs.get("attributes", {}) + initial_attributes.update(attributes) + cm = MagicMock() + cm.__enter__ = MagicMock(return_value=span) + cm.__exit__ = MagicMock(return_value=None) + return cm + + mock_tracer.start_as_current_span.side_effect = mock_start_span + + # Mock credentials + instance._get_credentials.return_value = None + + with patch( + "opentelemetry.instrumentation.botocore._determine_call_context", return_value=mock_call_context + ), patch("opentelemetry.instrumentation.botocore._find_extension", return_value=mock_extension), patch( + "opentelemetry.instrumentation.botocore.is_instrumentation_enabled", return_value=True + ), patch( + "amazon.opentelemetry.distro.patches._botocore_patches.get_server_attributes", return_value={} + ), patch( + "opentelemetry.instrumentation.botocore.get_tracer", return_value=mock_tracer + ), patch( + "opentelemetry.instrumentation.botocore.get_event_logger", return_value=MagicMock() + ), patch( + "opentelemetry.instrumentation.botocore.get_meter", return_value=MagicMock() + ): + instrumentor = BotocoreInstrumentor() + instrumentor.instrument() + instrumentor._patched_api_call(original_func, instance, args, kwargs) + + self.assertFalse("aws.auth.account.access_key" in initial_attributes) + self.assertTrue("aws.region" in initial_attributes) + instrumentor.uninstrument() + + def _test_patched_api_call_with_no_access_key(self): + # Create mocks + mock_tracer = MagicMock() + original_func: MagicMock = MagicMock(return_value={"ResponseMetadata": {"RequestId": "12345"}}) + instance: MagicMock = MagicMock() + span: MagicMock = MagicMock() + args = ("operation_name",) + kwargs = {} + initial_attributes = {} + mock_extension = _get_mock_extension() + mock_call_context = _get_mock_call_context() + + def mock_start_span(*args, **kwargs): + attributes = kwargs.get("attributes", {}) + initial_attributes.update(attributes) + cm = MagicMock() + cm.__enter__ = MagicMock(return_value=span) + cm.__exit__ = MagicMock(return_value=None) + return cm + + mock_tracer.start_as_current_span.side_effect = mock_start_span + + # Mock credentials + mock_credentials = MagicMock() + mock_credentials.access_key = None + instance._get_credentials.return_value = mock_credentials + + with patch( + "opentelemetry.instrumentation.botocore._determine_call_context", return_value=mock_call_context + ), patch("opentelemetry.instrumentation.botocore._find_extension", return_value=mock_extension), patch( + "opentelemetry.instrumentation.botocore.is_instrumentation_enabled", return_value=True + ), patch( + "amazon.opentelemetry.distro.patches._botocore_patches.get_server_attributes", return_value={} + ), patch( + "opentelemetry.instrumentation.botocore.get_tracer", return_value=mock_tracer + ), patch( + "opentelemetry.instrumentation.botocore.get_event_logger", return_value=MagicMock() + ), patch( + "opentelemetry.instrumentation.botocore.get_meter", return_value=MagicMock() + ): + instrumentor = BotocoreInstrumentor() + instrumentor.instrument() + instrumentor._patched_api_call(original_func, instance, args, kwargs) + + self.assertFalse("aws.auth.account.access_key" in initial_attributes) + self.assertTrue("aws.region" in initial_attributes) + instrumentor.uninstrument() + def _test_patched_gevent_os_ssl_instrumentation(self): # Only ssl and os module should have been patched since the environment variable was set to 'os, ssl' self.assertTrue(gevent.monkey.is_module_patched("ssl"), "gevent ssl module has not been patched") @@ -410,7 +574,7 @@ def _reset_mocks(self): def _do_extract_kinesis_attributes() -> Dict[str, str]: service_name: str = "kinesis" - params: Dict[str, str] = {"StreamName": _STREAM_NAME} + params: Dict[str, str] = {"StreamName": _STREAM_NAME, "StreamARN": _STREAM_ARN} return _do_extract_attributes(service_name, params) @@ -490,6 +654,12 @@ def _do_extract_attributes(service_name: str, params: Dict[str, Any], operation: return attributes +def _do_on_success_dynamodb() -> Dict[str, str]: + service_name: str = "dynamodb" + result: Dict[str, Any] = {"Table": {"TableArn": _TABLE_ARN}} + return _do_on_success(service_name, result) + + def _do_on_success( service_name: str, result: Dict[str, Any], operation: str = None, params: Dict[str, Any] = None ) -> Dict[str, str]: @@ -513,3 +683,33 @@ def set_side_effect(set_key, set_value): extension.on_success(span_mock, result, mock_instrumentor_context) return span_attributes + + +def _get_mock_extension(): + # Mock extension + mock_extension = MagicMock() + mock_extension.should_trace_service_call.return_value = True + mock_extension.tracer_schema_version.return_value = "1.0.0" + mock_extension.event_logger_schema_version.return_value = "1.0.0" + mock_extension.meter_schema_version.return_value = "1.0.0" + mock_extension.should_end_span_on_exit.return_value = True + mock_extension.extract_attributes = lambda x: None + mock_extension.before_service_call = lambda *args, **kwargs: None + mock_extension.after_service_call = lambda *args, **kwargs: None + mock_extension.on_success = lambda *args, **kwargs: None + mock_extension.on_error = lambda *args, **kwargs: None + mock_extension.setup_metrics = lambda meter, metrics: None + return mock_extension + + +def _get_mock_call_context(): + # Mock call context + mock_call_context = MagicMock() + mock_call_context.service = "test-service" + mock_call_context.service_id = "test-service" + mock_call_context.operation = "test-operation" + mock_call_context.region = "us-west-2" + mock_call_context.span_name = "test-span" + mock_call_context.span_kind = "CLIENT" + mock_call_context.endpoint_url = "https://www.awsmocktest.com" + return mock_call_context diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_regional_resource_arn_parser.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_regional_resource_arn_parser.py new file mode 100644 index 000000000..394d58f02 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_regional_resource_arn_parser.py @@ -0,0 +1,99 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from unittest import TestCase + +from amazon.opentelemetry.distro.regional_resource_arn_parser import RegionalResourceArnParser + + +class TestRegionalResourceArnParser(TestCase): + def test_get_account_id(self): + # Test invalid ARN formats + self.validate_get_account_id(None, None) + self.validate_get_account_id("", None) + self.validate_get_account_id(" ", None) + self.validate_get_account_id(":", None) + self.validate_get_account_id("::::::", None) + self.validate_get_account_id("not:an:arn:string", None) + self.validate_get_account_id("arn:aws:ec2:us-west-2:123456", None) + self.validate_get_account_id("arn:aws:ec2:us-west-2:1234567xxxxx", None) + self.validate_get_account_id("arn:aws:ec2:us-west-2:123456789012", None) + + # Test valid ARN formats + self.validate_get_account_id("arn:aws:dynamodb:us-west-2:123456789012:table/test_table", "123456789012") + self.validate_get_account_id("arn:aws:acm:us-east-1:123456789012:certificate:abc-123", "123456789012") + + def test_get_region(self): + # Test invalid ARN formats + self.validate_get_region(None, None) + self.validate_get_region("", None) + self.validate_get_region(" ", None) + self.validate_get_region(":", None) + self.validate_get_region("::::::", None) + self.validate_get_region("not:an:arn:string", None) + self.validate_get_region("arn:aws:ec2:us-west-2:123456", None) + self.validate_get_region("arn:aws:ec2:us-west-2:1234567xxxxx", None) + self.validate_get_region("arn:aws:ec2:us-west-2:123456789012", None) + + # Test valid ARN formats + self.validate_get_region("arn:aws:dynamodb:us-west-2:123456789012:table/test_table", "us-west-2") + self.validate_get_region("arn:aws:acm:us-east-1:123456789012:certificate:abc-123", "us-east-1") + + def test_extract_dynamodb_table_name_from_arn(self): + # Test invalid ARN formats + self.validate_dynamodb_table_name(None, None) + self.validate_dynamodb_table_name("", None) + self.validate_dynamodb_table_name(" ", None) + self.validate_dynamodb_table_name(":", None) + self.validate_dynamodb_table_name("::::::", None) + self.validate_dynamodb_table_name("not:an:arn:string", None) + + # Test valid ARN formats + self.validate_dynamodb_table_name("arn:aws:dynamodb:us-west-2:123456789012:table/test_table", "test_table") + self.validate_dynamodb_table_name( + "arn:aws:dynamodb:us-west-2:123456789012:table/my-table-name", "my-table-name" + ) + + def test_extract_kinesis_stream_name_from_arn(self): + # Test invalid ARN formats + self.validate_kinesis_stream_name(None, None) + self.validate_kinesis_stream_name("", None) + self.validate_kinesis_stream_name(" ", None) + self.validate_kinesis_stream_name(":", None) + self.validate_kinesis_stream_name("::::::", None) + self.validate_kinesis_stream_name("not:an:arn:string", None) + + # Test valid ARN formats + self.validate_kinesis_stream_name("arn:aws:kinesis:us-west-2:123456789012:stream/test_stream", "test_stream") + self.validate_kinesis_stream_name( + "arn:aws:kinesis:us-west-2:123456789012:stream/my-stream-name", "my-stream-name" + ) + + def test_extract_resource_name_from_arn(self): + # Test invalid ARN formats + self.validate_resource_name(None, None) + self.validate_resource_name("", None) + self.validate_resource_name(" ", None) + self.validate_resource_name(":", None) + self.validate_resource_name("::::::", None) + self.validate_resource_name("not:an:arn:string", None) + + # Test valid ARN formats + self.validate_resource_name("arn:aws:dynamodb:us-west-2:123456789012:table/test_table", "table/test_table") + self.validate_resource_name("arn:aws:kinesis:us-west-2:123456789012:stream/test_stream", "stream/test_stream") + self.validate_resource_name("arn:aws:s3:us-west-2:123456789012:my-bucket", "my-bucket") + + def validate_dynamodb_table_name(self, arn, expected_name): + self.assertEqual(RegionalResourceArnParser.extract_dynamodb_table_name_from_arn(arn), expected_name) + + def validate_kinesis_stream_name(self, arn, expected_name): + self.assertEqual(RegionalResourceArnParser.extract_kinesis_stream_name_from_arn(arn), expected_name) + + def validate_resource_name(self, arn, expected_name): + self.assertEqual(RegionalResourceArnParser.extract_resource_name_from_arn(arn), expected_name) + + def validate_get_region(self, arn, expected_region): + self.assertEqual(RegionalResourceArnParser.get_region(arn), expected_region) + + def validate_get_account_id(self, arn, expected_account_id): + self.assertEqual(RegionalResourceArnParser.get_account_id(arn), expected_account_id) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_sqs_url_parsers.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_sqs_url_parsers.py index 564024a58..702c1a522 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_sqs_url_parsers.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_sqs_url_parsers.py @@ -8,46 +8,83 @@ class TestSqsUrlParser(TestCase): def test_sqs_client_span_basic_urls(self): - self.validate("https://sqs.us-east-1.amazonaws.com/123412341234/Q_Name-5", "Q_Name-5") - self.validate("https://sqs.af-south-1.amazonaws.com/999999999999/-_ThisIsValid", "-_ThisIsValid") - self.validate("http://sqs.eu-west-3.amazonaws.com/000000000000/FirstQueue", "FirstQueue") - self.validate("sqs.sa-east-1.amazonaws.com/123456781234/SecondQueue", "SecondQueue") + self.validate_get_queue_name("https://sqs.us-east-1.amazonaws.com/123412341234/Q_Name-5", "Q_Name-5") + self.validate_get_queue_name("https://sqs.af-south-1.amazonaws.com/999999999999/-_ThisIsValid", "-_ThisIsValid") + self.validate_get_queue_name("http://sqs.eu-west-3.amazonaws.com/000000000000/FirstQueue", "FirstQueue") + self.validate_get_queue_name("sqs.sa-east-1.amazonaws.com/123456781234/SecondQueue", "SecondQueue") def test_sqs_client_span_legacy_format_urls(self): - self.validate("https://ap-northeast-2.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") - self.validate("http://cn-northwest-1.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") - self.validate("http://cn-north-1.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") - self.validate("ap-south-1.queue.amazonaws.com/123412341234/MyLongerQueueNameHere", "MyLongerQueueNameHere") - self.validate("https://queue.amazonaws.com/123456789012/MyQueue", "MyQueue") + self.validate_get_queue_name("https://ap-northeast-2.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") + self.validate_get_queue_name("http://cn-northwest-1.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") + self.validate_get_queue_name("http://cn-north-1.queue.amazonaws.com/123456789012/MyQueue", "MyQueue") + self.validate_get_queue_name( + "ap-south-1.queue.amazonaws.com/123412341234/MyLongerQueueNameHere", "MyLongerQueueNameHere" + ) + self.validate_get_queue_name("https://queue.amazonaws.com/123456789012/MyQueue", "MyQueue") def test_sqs_client_span_custom_urls(self): - self.validate("http://127.0.0.1:1212/123456789012/MyQueue", "MyQueue") - self.validate("https://127.0.0.1:1212/123412341234/RRR", "RRR") - self.validate("127.0.0.1:1212/123412341234/QQ", "QQ") - self.validate("https://amazon.com/123412341234/BB", "BB") + self.validate_get_queue_name("http://127.0.0.1:1212/123456789012/MyQueue", "MyQueue") + self.validate_get_queue_name("https://127.0.0.1:1212/123412341234/RRR", "RRR") + self.validate_get_queue_name("127.0.0.1:1212/123412341234/QQ", "QQ") + self.validate_get_queue_name("https://amazon.com/123412341234/BB", "BB") def test_sqs_client_span_long_urls(self): queue_name = "a" * 80 - self.validate("http://127.0.0.1:1212/123456789012/" + queue_name, queue_name) + self.validate_get_queue_name("http://127.0.0.1:1212/123456789012/" + queue_name, queue_name) queue_name_too_long = "a" * 81 - self.validate("http://127.0.0.1:1212/123456789012/" + queue_name_too_long, None) + self.validate_get_queue_name("http://127.0.0.1:1212/123456789012/" + queue_name_too_long, None) def test_client_span_sqs_invalid_or_empty_urls(self): - self.validate(None, None) - self.validate("", None) - self.validate(" ", None) - self.validate("/", None) - self.validate("//", None) - self.validate("///", None) - self.validate("//asdf", None) - self.validate("/123412341234/as?df", None) - self.validate("invalidUrl", None) - self.validate("https://www.amazon.com", None) - self.validate("https://sqs.us-east-1.amazonaws.com/123412341234/.", None) - self.validate("https://sqs.us-east-1.amazonaws.com/12/Queue", None) - self.validate("https://sqs.us-east-1.amazonaws.com/A/A", None) - self.validate("https://sqs.us-east-1.amazonaws.com/123412341234/A/ThisShouldNotBeHere", None) - - def validate(self, url, expected_name): + self.validate_get_queue_name(None, None) + self.validate_get_queue_name("", None) + self.validate_get_queue_name(" ", None) + self.validate_get_queue_name("/", None) + self.validate_get_queue_name("//", None) + self.validate_get_queue_name("///", None) + self.validate_get_queue_name("//asdf", None) + self.validate_get_queue_name("/123412341234/as?df", None) + self.validate_get_queue_name("invalidUrl", None) + self.validate_get_queue_name("https://www.amazon.com", None) + self.validate_get_queue_name("https://sqs.us-east-1.amazonaws.com/123412341234/.", None) + self.validate_get_queue_name("https://sqs.us-east-1.amazonaws.com/1234123412xx/Queue", None) + self.validate_get_queue_name("https://sqs.us-east-1.amazonaws.com/A/A", None) + self.validate_get_queue_name("https://sqs.us-east-1.amazonaws.com/123412341234/A/ThisShouldNotBeHere", None) + + def test_get_account_id_from_sqs_url(self): + self.validate_get_account_id(None, None) + self.validate_get_account_id("", None) + self.validate_get_account_id(" ", None) + self.validate_get_account_id("/", None) + self.validate_get_account_id("//", None) + self.validate_get_account_id("///", None) + self.validate_get_account_id("//asdf", None) + self.validate_get_account_id("/123412341234/as?df", None) + self.validate_get_account_id("invalidUrl", None) + self.validate_get_account_id("https://www.amazon.com", None) + self.validate_get_account_id("https://sqs.us-east-1.amazonaws.com/12341234/Queue", "12341234") + self.validate_get_account_id("https://sqs.us-east-1.amazonaws.com/1234123412xx/Queue", None) + self.validate_get_account_id("https://sqs.us-east-1.amazonaws.com/1234123412xx", None) + self.validate_get_account_id("https://sqs.us-east-1.amazonaws.com/123412341234/Q_Namez-5", "123412341234") + + def test_get_region_from_sqs_url(self): + self.validate_get_region(None, None) + self.validate_get_region("", None) + self.validate_get_region(" ", None) + self.validate_get_region("/", None) + self.validate_get_region("//", None) + self.validate_get_region("///", None) + self.validate_get_region("//asdf", None) + self.validate_get_region("/123412341234/as?df", None) + self.validate_get_region("invalidUrl", None) + self.validate_get_region("https://www.amazon.com", None) + self.validate_get_region("https://sqs.us-east-1.amazonaws.com/123412341234/Q_Namez-5", "us-east-1") + + def validate_get_region(self, url, expected_region): + self.assertEqual(SqsUrlParser.get_region(url), expected_region) + + def validate_get_account_id(self, url, expected_account_id): + self.assertEqual(SqsUrlParser.get_account_id(url), expected_account_id) + + def validate_get_queue_name(self, url, expected_name): self.assertEqual(SqsUrlParser.get_queue_name(url), expected_name) diff --git a/contract-tests/images/applications/botocore/botocore_server.py b/contract-tests/images/applications/botocore/botocore_server.py index 80ecbc6fe..36bf87dbb 100644 --- a/contract-tests/images/applications/botocore/botocore_server.py +++ b/contract-tests/images/applications/botocore/botocore_server.py @@ -54,6 +54,8 @@ def do_GET(self): self._handle_stepfunctions_request() if self.in_path("sns"): self._handle_sns_request() + if self.in_path("cross-account"): + self._handle_cross_account_request() self._end_request(self.main_status) @@ -85,6 +87,23 @@ def do_PUT(self): def in_path(self, sub_path: str) -> bool: return sub_path in self.path + def _handle_cross_account_request(self) -> None: + s3_client = boto3.client( + "s3", + endpoint_url=_AWS_SDK_S3_ENDPOINT, + region_name="eu-central-1", + aws_access_key_id="account_b_access_key_id", + aws_secret_access_key="account_b_secret_access_key", + aws_session_token="account_b_token", + ) + if self.in_path("createbucket/account_b"): + set_main_status(200) + s3_client.create_bucket( + Bucket="cross-account-bucket", CreateBucketConfiguration={"LocationConstraint": "eu-central-1"} + ) + else: + set_main_status(404) + def _handle_s3_request(self) -> None: s3_client: BaseClient = boto3.client("s3", endpoint_url=_AWS_SDK_S3_ENDPOINT, region_name=_AWS_REGION) if self.in_path(_ERROR): @@ -151,6 +170,11 @@ def _handle_ddb_request(self) -> None: ], BillingMode="PAY_PER_REQUEST", ) + elif self.in_path("describetable/some-table"): + set_main_status(200) + ddb_client.describe_table( + TableName="put_test_table", + ) elif self.in_path("putitem/putitem-table/key"): set_main_status(200) item: dict = {"id": {"S": "1"}} @@ -213,6 +237,11 @@ def _handle_kinesis_request(self) -> None: elif self.in_path("putrecord/my-stream"): set_main_status(200) kinesis_client.put_record(StreamName="test_stream", Data=b"test", PartitionKey="partition_key") + elif self.in_path("describestream/my-stream"): + set_main_status(200) + kinesis_client.describe_stream( + StreamName="test_stream", StreamARN="arn:aws:kinesis:us-west-2:000000000000:stream/test_stream" + ) else: set_main_status(404) diff --git a/contract-tests/tests/test/amazon/botocore/botocore_test.py b/contract-tests/tests/test/amazon/botocore/botocore_test.py index 549ec3f50..f5b5638ae 100644 --- a/contract-tests/tests/test/amazon/botocore/botocore_test.py +++ b/contract-tests/tests/test/amazon/botocore/botocore_test.py @@ -16,7 +16,10 @@ AWS_LOCAL_OPERATION, AWS_LOCAL_SERVICE, AWS_REMOTE_OPERATION, + AWS_REMOTE_RESOURCE_ACCESS_KEY, + AWS_REMOTE_RESOURCE_ACCOUNT_ID, AWS_REMOTE_RESOURCE_IDENTIFIER, + AWS_REMOTE_RESOURCE_REGION, AWS_REMOTE_RESOURCE_TYPE, AWS_REMOTE_SERVICE, AWS_SPAN_KIND, @@ -50,6 +53,8 @@ _AWS_STATE_MACHINE_ARN: str = "aws.stepfunctions.state_machine.arn" _AWS_ACTIVITY_ARN: str = "aws.stepfunctions.activity.arn" _AWS_SNS_TOPIC_ARN: str = "aws.sns.topic.arn" +_AWS_DYNAMODB_TABLE_ARN: str = "aws.dynamodb.table.arn" +_AWS_KINESIS_STREAM_ARN: str = "aws.kinesis.stream.arn" # pylint: disable=too-many-public-methods,too-many-lines @@ -212,6 +217,29 @@ def test_dynamodb_create_table(self): span_name="DynamoDB.CreateTable", ) + def test_dynamodb_describe_table(self): + self.do_test_requests( + "ddb/describetable/some-table", + "GET", + 200, + 0, + 0, + remote_service="AWS::DynamoDB", + remote_operation="DescribeTable", + remote_resource_type="AWS::DynamoDB::Table", + remote_resource_identifier="put_test_table", + remote_resource_account_id="000000000000", + remote_resource_region="us-west-2", + cloudformation_primary_identifier="put_test_table", + request_specific_attributes={ + SpanAttributes.AWS_DYNAMODB_TABLE_NAMES: ["put_test_table"], + }, + response_specific_attributes={ + _AWS_DYNAMODB_TABLE_ARN: r"arn:aws:dynamodb:us-west-2:000000000000:table/put_test_table", + }, + span_name="DynamoDB.DescribeTable", + ) + def test_dynamodb_put_item(self): self.do_test_requests( "ddb/putitem/putitem-table/key", @@ -379,6 +407,26 @@ def test_kinesis_put_record(self): span_name="Kinesis.PutRecord", ) + def test_kinesis_describe_stream(self): + self.do_test_requests( + "kinesis/describestream/my-stream", + "GET", + 200, + 0, + 0, + remote_service="AWS::Kinesis", + remote_operation="DescribeStream", + remote_resource_type="AWS::Kinesis::Stream", + remote_resource_identifier="test_stream", + cloudformation_primary_identifier="test_stream", + remote_resource_account_id="000000000000", + remote_resource_region="us-west-2", + request_specific_attributes={ + _AWS_KINESIS_STREAM_NAME: "test_stream", + }, + span_name="Kinesis.DescribeStream", + ) + def test_kinesis_error(self): self.do_test_requests( "kinesis/error", @@ -878,6 +926,26 @@ def test_stepfunctions_fault(self): span_name="SFN.ListStateMachineVersions", ) + def test_cross_account(self): + self.do_test_requests( + "cross-account/createbucket/account_b", + "GET", + 200, + 0, + 0, + remote_service="AWS::S3", + remote_operation="CreateBucket", + remote_resource_type="AWS::S3::Bucket", + remote_resource_identifier="cross-account-bucket", + cloudformation_primary_identifier="cross-account-bucket", + request_specific_attributes={ + SpanAttributes.AWS_S3_BUCKET: "cross-account-bucket", + }, + remote_resource_access_key="account_b_access_key_id", + remote_resource_region="eu-central-1", + span_name="S3.CreateBucket", + ) + # TODO: Add contract test for lambda event source mapping resource @override @@ -897,6 +965,9 @@ def _assert_aws_span_attributes(self, resource_scope_spans: List[ResourceScopeSp kwargs.get("remote_resource_type", "None"), kwargs.get("remote_resource_identifier", "None"), kwargs.get("cloudformation_primary_identifier", "None"), + kwargs.get("remote_resource_account_id", "None"), + kwargs.get("remote_resource_access_key", "None"), + kwargs.get("remote_resource_region", "None"), ) def _assert_aws_attributes( @@ -908,6 +979,9 @@ def _assert_aws_attributes( remote_resource_type: str, remote_resource_identifier: str, cloudformation_primary_identifier: str, + remote_resource_account_id: str, + remote_resource_access_key: str, + remote_resource_region: str, ) -> None: attributes_dict: Dict[str, AnyValue] = self._get_attributes_dict(attributes_list) self._assert_str_attribute(attributes_dict, AWS_LOCAL_SERVICE, self.get_application_otel_service_name()) @@ -934,8 +1008,16 @@ def _assert_aws_attributes( self._assert_str_attribute( attributes_dict, AWS_CLOUDFORMATION_PRIMARY_IDENTIFIER, cloudformation_primary_identifier ) - # See comment above AWS_LOCAL_OPERATION - self._assert_str_attribute(attributes_dict, AWS_SPAN_KIND, span_kind) + if remote_resource_account_id != "None": + assert remote_resource_identifier != "None" + self._assert_str_attribute(attributes_dict, AWS_REMOTE_RESOURCE_ACCOUNT_ID, remote_resource_account_id) + self.assertIsNone(attributes_dict.get(AWS_REMOTE_RESOURCE_ACCESS_KEY)) + if remote_resource_access_key != "None": + assert remote_resource_identifier != "None" + self._assert_str_attribute(attributes_dict, AWS_REMOTE_RESOURCE_ACCESS_KEY, remote_resource_access_key) + self.assertIsNone(attributes_dict.get(AWS_REMOTE_RESOURCE_ACCOUNT_ID)) + if remote_resource_region != "None": + self._assert_str_attribute(attributes_dict, AWS_REMOTE_RESOURCE_REGION, remote_resource_region) @override def _assert_semantic_conventions_span_attributes( @@ -999,6 +1081,7 @@ def _assert_metric_attributes( expected_sum: int, **kwargs, ) -> None: + # pylint: disable=too-many-locals target_metrics: List[Metric] = [] for resource_scope_metric in resource_scope_metrics: if resource_scope_metric.metric.name.lower() == metric_name.lower(): @@ -1023,6 +1106,9 @@ def _assert_metric_attributes( self._assert_str_attribute(attribute_dict, AWS_SPAN_KIND, "CLIENT") remote_resource_type = kwargs.get("remote_resource_type", "None") remote_resource_identifier = kwargs.get("remote_resource_identifier", "None") + remote_resource_account_id = kwargs.get("remote_resource_account_id", "None") + remote_resource_access_key = kwargs.get("remote_resource_access_key", "None") + remote_resource_region = kwargs.get("remote_resource_region", "None") if remote_resource_type != "None": self._assert_str_attribute(attribute_dict, AWS_REMOTE_RESOURCE_TYPE, remote_resource_type) if remote_resource_identifier != "None": @@ -1030,6 +1116,16 @@ def _assert_metric_attributes( self._assert_match_attribute(attribute_dict, AWS_REMOTE_RESOURCE_IDENTIFIER, remote_resource_identifier) else: self._assert_str_attribute(attribute_dict, AWS_REMOTE_RESOURCE_IDENTIFIER, remote_resource_identifier) + if remote_resource_account_id != "None": + assert remote_resource_identifier != "None" + self._assert_str_attribute(attribute_dict, AWS_REMOTE_RESOURCE_ACCOUNT_ID, remote_resource_account_id) + self.assertIsNone(attribute_dict.get(AWS_REMOTE_RESOURCE_ACCESS_KEY)) + if remote_resource_access_key != "None": + assert remote_resource_identifier != "None" + self._assert_str_attribute(attribute_dict, AWS_REMOTE_RESOURCE_ACCESS_KEY, remote_resource_access_key) + self.assertIsNone(attribute_dict.get(AWS_REMOTE_RESOURCE_ACCOUNT_ID)) + if remote_resource_region != "None": + self._assert_str_attribute(attribute_dict, AWS_REMOTE_RESOURCE_REGION, remote_resource_region) self.check_sum(metric_name, dependency_dp.sum, expected_sum) attribute_dict: Dict[str, AnyValue] = self._get_attributes_dict(service_dp.attributes) diff --git a/contract-tests/tests/test/amazon/utils/application_signals_constants.py b/contract-tests/tests/test/amazon/utils/application_signals_constants.py index e77c7c4ce..c05c40f99 100644 --- a/contract-tests/tests/test/amazon/utils/application_signals_constants.py +++ b/contract-tests/tests/test/amazon/utils/application_signals_constants.py @@ -22,6 +22,9 @@ AWS_REMOTE_DB_USER: str = "aws.remote.db.user" AWS_REMOTE_SERVICE: str = "aws.remote.service" AWS_REMOTE_OPERATION: str = "aws.remote.operation" +AWS_REMOTE_RESOURCE_ACCESS_KEY: str = "aws.remote.resource.account.access_key" +AWS_REMOTE_RESOURCE_ACCOUNT_ID: str = "aws.remote.resource.account.id" +AWS_REMOTE_RESOURCE_REGION: str = "aws.remote.resource.region" AWS_REMOTE_RESOURCE_TYPE: str = "aws.remote.resource.type" AWS_REMOTE_RESOURCE_IDENTIFIER: str = "aws.remote.resource.identifier" AWS_SPAN_KIND: str = "aws.span.kind"