Skip to content

Commit 1b18ed1

Browse files
update url & arn parser; update unit test after version upgrade
1 parent a23151a commit 1b18ed1

File tree

8 files changed

+266
-183
lines changed

8 files changed

+266
-183
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -417,16 +417,20 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri
417417
remote_resource_identifier = _escape_delimiters(span.attributes.get(_AWS_TABLE_NAMES)[0])
418418
elif is_key_present(span, AWS_DYNAMODB_TABLE_ARN):
419419
remote_resource_type = _NORMALIZED_DYNAMO_DB_SERVICE_NAME + "::Table"
420-
remote_resource_identifier = (
421-
_escape_delimiters(span.attributes.get(AWS_DYNAMODB_TABLE_ARN)).split(":")[-1].replace("table/", "")
420+
remote_resource_identifier = _escape_delimiters(
421+
RegionalResourceArnParser.extract_dynamodb_table_name_from_arn(
422+
span.attributes.get(AWS_DYNAMODB_TABLE_ARN)
423+
)
422424
)
423425
elif is_key_present(span, AWS_KINESIS_STREAM_NAME):
424426
remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream"
425427
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_KINESIS_STREAM_NAME))
426428
elif is_key_present(span, AWS_KINESIS_STREAM_ARN):
427429
remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream"
428-
remote_resource_identifier = (
429-
_escape_delimiters(span.attributes.get(AWS_KINESIS_STREAM_ARN)).split(":")[-1].replace("stream/", "")
430+
remote_resource_identifier = _escape_delimiters(
431+
RegionalResourceArnParser.extract_kinesis_stream_name_from_arn(
432+
span.attributes.get(AWS_KINESIS_STREAM_ARN)
433+
)
430434
)
431435
elif is_key_present(span, _AWS_BUCKET_NAME):
432436
remote_resource_type = _NORMALIZED_S3_SERVICE_NAME + "::Bucket"
@@ -464,27 +468,35 @@ def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttri
464468
remote_resource_identifier = _escape_delimiters(span.attributes.get(GEN_AI_REQUEST_MODEL))
465469
elif is_key_present(span, AWS_SECRETSMANAGER_SECRET_ARN):
466470
remote_resource_type = _NORMALIZED_SECRETSMANAGER_SERVICE_NAME + "::Secret"
467-
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_SECRETSMANAGER_SECRET_ARN)).split(
468-
":"
469-
)[-1]
471+
remote_resource_identifier = _escape_delimiters(
472+
RegionalResourceArnParser.extract_resource_name_from_arn(
473+
span.attributes.get(AWS_SECRETSMANAGER_SECRET_ARN)
474+
)
475+
)
470476
cloudformation_primary_identifier = _escape_delimiters(span.attributes.get(AWS_SECRETSMANAGER_SECRET_ARN))
471477
elif is_key_present(span, AWS_SNS_TOPIC_ARN):
472478
remote_resource_type = _NORMALIZED_SNS_SERVICE_NAME + "::Topic"
473-
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_SNS_TOPIC_ARN)).split(":")[-1]
479+
remote_resource_identifier = _escape_delimiters(
480+
RegionalResourceArnParser.extract_resource_name_from_arn(span.attributes.get(AWS_SNS_TOPIC_ARN))
481+
)
474482
cloudformation_primary_identifier = _escape_delimiters(span.attributes.get(AWS_SNS_TOPIC_ARN))
475483
elif is_key_present(span, AWS_STEPFUNCTIONS_STATEMACHINE_ARN):
476484
remote_resource_type = _NORMALIZED_STEPFUNCTIONS_SERVICE_NAME + "::StateMachine"
477485
remote_resource_identifier = _escape_delimiters(
478-
span.attributes.get(AWS_STEPFUNCTIONS_STATEMACHINE_ARN)
479-
).split(":")[-1]
486+
RegionalResourceArnParser.extract_resource_name_from_arn(
487+
span.attributes.get(AWS_STEPFUNCTIONS_STATEMACHINE_ARN)
488+
)
489+
)
480490
cloudformation_primary_identifier = _escape_delimiters(
481491
span.attributes.get(AWS_STEPFUNCTIONS_STATEMACHINE_ARN)
482492
)
483493
elif is_key_present(span, AWS_STEPFUNCTIONS_ACTIVITY_ARN):
484494
remote_resource_type = _NORMALIZED_STEPFUNCTIONS_SERVICE_NAME + "::Activity"
485-
remote_resource_identifier = _escape_delimiters(span.attributes.get(AWS_STEPFUNCTIONS_ACTIVITY_ARN)).split(
486-
":"
487-
)[-1]
495+
remote_resource_identifier = _escape_delimiters(
496+
RegionalResourceArnParser.extract_resource_name_from_arn(
497+
span.attributes.get(AWS_STEPFUNCTIONS_ACTIVITY_ARN)
498+
)
499+
)
488500
cloudformation_primary_identifier = _escape_delimiters(span.attributes.get(AWS_STEPFUNCTIONS_ACTIVITY_ARN))
489501
elif is_key_present(span, AWS_LAMBDA_FUNCTION_NAME):
490502
# For non-Invoke Lambda operations, treat Lambda as a resource,

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,15 @@ def get_aws_region() -> Optional[str]:
7575
"""
7676
botocore_session = get_aws_session()
7777
return botocore_session.get_config_variable("region") if botocore_session else None
78+
79+
80+
def is_account_id(input_str: str) -> bool:
81+
if input_str is None:
82+
return False
83+
84+
try:
85+
int(input_str)
86+
except ValueError:
87+
return False
88+
89+
return True

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/regional_resource_arn_parser.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# SPDX-License-Identifier: Apache-2.0
33
from typing import Optional
44

5+
from amazon.opentelemetry.distro._utils import is_account_id
6+
57

68
class RegionalResourceArnParser:
79
@staticmethod
@@ -16,6 +18,28 @@ def get_region(arn: str) -> Optional[str]:
1618
return str(arn).split(":")[3]
1719
return None
1820

21+
@staticmethod
22+
def extract_dynamodb_table_name_from_arn(arn: str) -> Optional[str]:
23+
resource_name = RegionalResourceArnParser.extract_resource_name_from_arn(arn)
24+
if resource_name:
25+
return resource_name.replace("table/", "")
26+
return None
27+
28+
@staticmethod
29+
def extract_kinesis_stream_name_from_arn(arn: str) -> Optional[str]:
30+
resource_name = RegionalResourceArnParser.extract_resource_name_from_arn(arn)
31+
if resource_name:
32+
return resource_name.replace("stream/", "")
33+
return None
34+
35+
@staticmethod
36+
def extract_resource_name_from_arn(arn: str) -> Optional[str]:
37+
# Extracts the name of the resource from an arn
38+
if _is_arn(arn):
39+
split = arn.split(":")
40+
return split[-1]
41+
return None
42+
1943

2044
def _is_arn(arn: str) -> bool:
2145
# Check if arn follows the format:
@@ -28,18 +52,4 @@ def _is_arn(arn: str) -> bool:
2852
return False
2953

3054
arn_parts = str(arn).split(":")
31-
return len(arn_parts) >= 6 and _is_account_id(arn_parts[4])
32-
33-
34-
def _is_account_id(input: str) -> bool:
35-
if input is None or len(input) != 12:
36-
return False
37-
38-
if not _check_digits(input):
39-
return False
40-
41-
return True
42-
43-
44-
def _check_digits(string: str) -> bool:
45-
return string.isdigit()
55+
return len(arn_parts) >= 6 and is_account_id(arn_parts[4])

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py

Lines changed: 30 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
# SPDX-License-Identifier: Apache-2.0
3-
from typing import List, Optional
3+
from typing import List, Optional, Tuple
4+
5+
from amazon.opentelemetry.distro._utils import is_account_id
46

57
_HTTP_SCHEMA: str = "http://"
68
_HTTPS_SCHEMA: str = "https://"
@@ -12,14 +14,14 @@ def get_queue_name(url: str) -> Optional[str]:
1214
"""
1315
Best-effort logic to extract queue name from an HTTP url. This method should only be used with a string that is,
1416
with reasonably high confidence, an SQS queue URL. Handles new/legacy/some custom URLs. Essentially, we require
15-
that the URL should have exactly three parts, delimited by /'s (excluding schema), the second part should be a
16-
12-digit account id, and the third part should be a valid queue name, per SQS naming conventions.
17+
that the URL should have exactly three parts, delimited by /'s (excluding schema), the second part should be an
18+
account id consisting of digits, and the third part should be a valid queue name, per SQS naming conventions.
1719
"""
1820
if url is None:
1921
return None
20-
url = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")
21-
split_url: List[Optional[str]] = url.split("/")
22-
if len(split_url) == 3 and _is_account_id(split_url[1]) and _is_valid_queue_name(split_url[2]):
22+
urlWithoutProtocol = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")
23+
split_url: List[Optional[str]] = urlWithoutProtocol.split("/")
24+
if len(split_url) == 3 and is_account_id(split_url[1]) and _is_valid_queue_name(split_url[2]):
2325
return split_url[2]
2426
return None
2527

@@ -28,56 +30,38 @@ def get_account_id(url: str) -> Optional[str]:
2830
"""
2931
Extracts the account ID from an SQS URL.
3032
"""
31-
if url is None:
32-
return None
33-
url = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")
34-
split_url: List[Optional[str]] = url.split("/")
35-
if _is_valid_sqs_url(url):
36-
return split_url[1]
37-
return None
33+
return SqsUrlParser.parse_url(url)[1]
3834

3935
@staticmethod
4036
def get_region(url: str) -> Optional[str]:
4137
"""
4238
Extracts the region from an SQS URL.
4339
"""
44-
if url is None:
45-
return None
46-
url = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")
47-
split_url: List[Optional[str]] = url.split("/")
48-
if _is_valid_sqs_url(url):
49-
domain: str = split_url[0]
50-
domain_parts: List[str] = domain.split(".")
51-
if len(domain_parts) == 4:
52-
return domain_parts[1]
53-
return None
54-
55-
56-
def _is_valid_sqs_url(url: str) -> bool:
57-
"""
58-
Checks if the URL is a valid SQS URL.
59-
"""
60-
if url is None:
61-
return False
62-
split_url: List[str] = url.split("/")
63-
return (
64-
len(split_url) == 3
65-
and split_url[0].lower().startswith("sqs")
66-
and _is_account_id(split_url[1])
67-
and _is_valid_queue_name(split_url[2])
68-
)
40+
return SqsUrlParser.parse_url(url)[2]
6941

42+
@staticmethod
43+
def parse_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str]]:
44+
"""
45+
Parses an SQS URL and extracts its components.
46+
URL Format: https://sqs.<region>.amazonaws.com/<accountId>/<queueName>
47+
"""
48+
if url is None:
49+
return None, None, None
7050

71-
def _is_account_id(input_str: str) -> bool:
72-
if input_str is None or len(input_str) != 12:
73-
return False
51+
urlWithoutProtocol = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")
52+
split_url: List[Optional[str]] = urlWithoutProtocol.split("/")
53+
if (
54+
len(split_url) != 3
55+
or not is_account_id(split_url[1])
56+
or not _is_valid_queue_name(split_url[2])
57+
or not split_url[0].lower().startswith("sqs")
58+
):
59+
return None, None, None
7460

75-
try:
76-
int(input_str)
77-
except ValueError:
78-
return False
61+
domain: str = split_url[0]
62+
domain_parts: List[str] = domain.split(".")
7963

80-
return True
64+
return split_url[2], split_url[1], domain_parts[1] if len(domain_parts) == 4 else None
8165

8266

8367
def _is_valid_queue_name(input_str: str) -> bool:

aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_aws_metric_attribute_generator.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,21 +1527,14 @@ def test_sdk_client_span_with_remote_resource_attributes(self):
15271527
keys,
15281528
values,
15291529
)
1530-
self._validate_remote_resource_attributes(
1531-
"AWS::StepFunctions::StateMachine",
1532-
"invalid_arn",
1533-
None,
1534-
_AWS_REMOTE_RESOURCE_REGION,
1535-
None,
1536-
_AWS_REMOTE_RESOURCE_ACCESS_KEY,
1537-
)
1530+
self._validate_remote_resource_attributes(None, None, None)
15381531
self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None])
15391532

15401533
# Invalid arn and no account access key
15411534
self._mock_attribute(
15421535
[AWS_STEPFUNCTIONS_STATEMACHINE_ARN, SpanAttributes.RPC_SYSTEM], ["invalid_arn", "aws-api"]
15431536
)
1544-
self._validate_remote_resource_attributes("AWS::StepFunctions::StateMachine", "invalid_arn")
1537+
self._validate_remote_resource_attributes(None, None, None)
15451538
self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None])
15461539

15471540
# Both account access key and account id are not available
@@ -1571,11 +1564,7 @@ def test_sdk_client_span_with_remote_resource_attributes(self):
15711564
[AWS_STEPFUNCTIONS_STATEMACHINE_ARN, SpanAttributes.RPC_SYSTEM],
15721565
["arn:aws:states:us-east-1:invalid_account_id:stateMachine:testStateMachine", "aws-api"],
15731566
)
1574-
self._validate_remote_resource_attributes(
1575-
"AWS::StepFunctions::StateMachine",
1576-
"testStateMachine",
1577-
"arn:aws:states:us-east-1:invalid_account_id:stateMachine:testStateMachine",
1578-
)
1567+
self._validate_remote_resource_attributes(None, None, None)
15791568
self._mock_attribute([AWS_STEPFUNCTIONS_STATEMACHINE_ARN], [None])
15801569

15811570
# Arn with invalid region

0 commit comments

Comments
 (0)