Skip to content

Commit 26e62a6

Browse files
authored
Metric Schema changes (#150)
In this commit, we are removing RemoteTarget and replacing with RemoteResourceIdentifier and RemoteResourceType. Further, we are formatting RemoteService, and the content of the RemoteResource attributes such that they align with AWS Cloud Control resource names. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent d7052e0 commit 26e62a6

File tree

7 files changed

+229
-324
lines changed

7 files changed

+229
-324
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_attribute_keys.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
AWS_LOCAL_OPERATION: str = "aws.local.operation"
77
AWS_REMOTE_SERVICE: str = "aws.remote.service"
88
AWS_REMOTE_OPERATION: str = "aws.remote.operation"
9-
AWS_REMOTE_TARGET: str = "aws.remote.target"
9+
AWS_REMOTE_RESOURCE_TYPE: str = "aws.remote.resource.type"
10+
AWS_REMOTE_RESOURCE_IDENTIFIER: str = "aws.remote.resource.identifier"
1011
AWS_SDK_DESCENDANT: str = "aws.sdk.descendant"
1112
AWS_CONSUMER_PARENT_SPAN_KIND: str = "aws.consumer.parent.span.kind"
1213

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py

Lines changed: 45 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
AWS_QUEUE_NAME,
1212
AWS_QUEUE_URL,
1313
AWS_REMOTE_OPERATION,
14+
AWS_REMOTE_RESOURCE_IDENTIFIER,
15+
AWS_REMOTE_RESOURCE_TYPE,
1416
AWS_REMOTE_SERVICE,
15-
AWS_REMOTE_TARGET,
1617
AWS_SPAN_KIND,
1718
AWS_STREAM_NAME,
1819
)
@@ -65,6 +66,12 @@
6566
_AWS_TABLE_NAMES: str = SpanAttributes.AWS_DYNAMODB_TABLE_NAMES
6667
_AWS_BUCKET_NAME: str = SpanAttributes.AWS_S3_BUCKET
6768

69+
# Normalized remote service names for supported AWS services
70+
_NORMALIZED_DYNAMO_DB_SERVICE_NAME: str = "AWS::DynamoDB"
71+
_NORMALIZED_KINESIS_SERVICE_NAME: str = "AWS::Kinesis"
72+
_NORMALIZED_S3_SERVICE_NAME: str = "AWS::S3"
73+
_NORMALIZED_SQS_SERVICE_NAME: str = "AWS::SQS"
74+
6875
# Special DEPENDENCY attribute value if GRAPHQL_OPERATION_TYPE attribute key is present.
6976
_GRAPHQL: str = "graphql"
7077

@@ -110,7 +117,7 @@ def _generate_dependency_metric_attributes(span: ReadableSpan, resource: Resourc
110117
_set_service(resource, span, attributes)
111118
_set_egress_operation(span, attributes)
112119
_set_remote_service_and_operation(span, attributes)
113-
_set_remote_target(span, attributes)
120+
_set_remote_type_and_identifier(span, attributes)
114121
_set_span_kind_for_dependency(span, attributes)
115122
return attributes
116123

@@ -198,7 +205,7 @@ def _set_remote_service_and_operation(span: ReadableSpan, attributes: BoundedAtt
198205
remote_service = _get_remote_service(span, AWS_REMOTE_SERVICE)
199206
remote_operation = _get_remote_operation(span, AWS_REMOTE_OPERATION)
200207
elif is_key_present(span, _RPC_SERVICE) or is_key_present(span, _RPC_METHOD):
201-
remote_service = _normalize_service_name(span, _get_remote_service(span, _RPC_SERVICE))
208+
remote_service = _normalize_remote_service_name(span, _get_remote_service(span, _RPC_SERVICE))
202209
remote_operation = _get_remote_operation(span, _RPC_METHOD)
203210
elif is_key_present(span, _DB_SYSTEM) or is_key_present(span, _DB_OPERATION) or is_key_present(span, _DB_STATEMENT):
204211
remote_service = _get_remote_service(span, _DB_SYSTEM)
@@ -268,10 +275,14 @@ def _get_db_statement_remote_operation(span: ReadableSpan, statement_key: str) -
268275
return remote_operation
269276

270277

271-
def _normalize_service_name(span: ReadableSpan, service_name: str) -> str:
278+
def _normalize_remote_service_name(span: ReadableSpan, service_name: str) -> str:
279+
"""
280+
If the span is an AWS SDK span, normalize the name to align with <a
281+
href="https://docs.aws.amazon.com/cloudcontrolapi/latest/userguide/supported-resources.html">AWS Cloud Control
282+
resource format</a> as much as possible. Long term, we would like to normalize service name in the upstream.
283+
"""
272284
if is_aws_sdk_span(span):
273-
return "AWS.SDK." + service_name
274-
285+
return "AWS::" + service_name
275286
return service_name
276287

277288

@@ -320,38 +331,39 @@ def _generate_remote_operation(span: ReadableSpan) -> str:
320331
return remote_operation
321332

322333

323-
def _set_remote_target(span: ReadableSpan, attributes: BoundedAttributes) -> None:
324-
remote_target: Optional[str] = _get_remote_target(span)
325-
if remote_target is not None:
326-
attributes[AWS_REMOTE_TARGET] = remote_target
327-
328-
329-
def _get_remote_target(span: ReadableSpan) -> Optional[str]:
334+
def _set_remote_type_and_identifier(span: ReadableSpan, attributes: BoundedAttributes) -> None:
330335
"""
331-
RemoteTarget attribute AWS_REMOTE_TARGET is used to store the resource
332-
name of the remote invokes, such as S3 bucket name, mysql table name, etc.
333-
TODO: currently only support AWS resource name, will be extended to support
334-
the general remote targets, such as ActiveMQ name, etc.
335-
"""
336-
if is_key_present(span, _AWS_BUCKET_NAME):
337-
return "::s3:::" + span.attributes.get(_AWS_BUCKET_NAME)
338-
339-
if is_key_present(span, AWS_QUEUE_URL):
340-
arn = SqsUrlParser.get_sqs_remote_target(span.attributes.get(AWS_QUEUE_URL))
341-
if arn:
342-
return arn
343-
344-
if is_key_present(span, AWS_QUEUE_NAME):
345-
return "::sqs:::" + span.attributes.get(AWS_QUEUE_NAME)
336+
Remote resource attributes {@link AwsAttributeKeys#AWS_REMOTE_RESOURCE_TYPE} and {@link
337+
AwsAttributeKeys#AWS_REMOTE_RESOURCE_IDENTIFIER} are used to store information about the resource associated with
338+
the remote invocation, such as S3 bucket name, etc. We should only ever set both type and identifier or neither.
346339
347-
if is_key_present(span, AWS_STREAM_NAME):
348-
return "::kinesis:::stream/" + span.attributes.get(AWS_STREAM_NAME)
340+
AWS resources type and identifier adhere to <a
341+
href="https://docs.aws.amazon.com/cloudcontrolapi/latest/userguide/supported-resources.html">AWS Cloud Control
342+
resource format</a>.
343+
"""
344+
remote_resource_type: Optional[str] = None
345+
remote_resource_identifier: Optional[str] = None
349346

350347
# Only extract the table name when _AWS_TABLE_NAMES has size equals to one
351348
if is_key_present(span, _AWS_TABLE_NAMES) and len(span.attributes.get(_AWS_TABLE_NAMES)) == 1:
352-
return "::dynamodb:::table/" + span.attributes.get(_AWS_TABLE_NAMES)[0]
353-
354-
return None
349+
remote_resource_type = _NORMALIZED_DYNAMO_DB_SERVICE_NAME + "::Table"
350+
remote_resource_identifier = span.attributes.get(_AWS_TABLE_NAMES)[0]
351+
elif is_key_present(span, AWS_STREAM_NAME):
352+
remote_resource_type = _NORMALIZED_KINESIS_SERVICE_NAME + "::Stream"
353+
remote_resource_identifier = span.attributes.get(AWS_STREAM_NAME)
354+
elif is_key_present(span, _AWS_BUCKET_NAME):
355+
remote_resource_type = _NORMALIZED_S3_SERVICE_NAME + "::Bucket"
356+
remote_resource_identifier = span.attributes.get(_AWS_BUCKET_NAME)
357+
elif is_key_present(span, AWS_QUEUE_NAME):
358+
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
359+
remote_resource_identifier = span.attributes.get(AWS_QUEUE_NAME)
360+
elif is_key_present(span, AWS_QUEUE_URL):
361+
remote_resource_type = _NORMALIZED_SQS_SERVICE_NAME + "::Queue"
362+
remote_resource_identifier = SqsUrlParser.get_queue_name(span.attributes.get(AWS_QUEUE_URL))
363+
364+
if remote_resource_type is not None and remote_resource_identifier is not None:
365+
attributes[AWS_REMOTE_RESOURCE_TYPE] = remote_resource_type
366+
attributes[AWS_REMOTE_RESOURCE_IDENTIFIER] = remote_resource_identifier
355367

356368

357369
def _set_span_kind_for_dependency(span: ReadableSpan, attributes: BoundedAttributes) -> None:

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/sqs_url_parser.py

Lines changed: 20 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -2,106 +2,30 @@
22
# SPDX-License-Identifier: Apache-2.0
33
from typing import List, Optional
44

5-
_ARN_DELIMETER: str = ":"
65
_HTTP_SCHEMA: str = "http://"
76
_HTTPS_SCHEMA: str = "https://"
87

98

109
class SqsUrlParser:
1110
@staticmethod
12-
def get_sqs_remote_target(sqs_url: str) -> Optional[str]:
13-
sqs_url: str = _strip_schema_from_url(sqs_url)
14-
15-
if not _is_sqs_url(sqs_url) and not _is_legacy_sqs_url(sqs_url) and not _is_custom_url(sqs_url):
11+
def get_queue_name(url: str) -> Optional[str]:
12+
"""
13+
Best-effort logic to extract queue name from an HTTP url. This method should only be used with a string that is,
14+
with reasonably high confidence, an SQS queue URL. Handles new/legacy/some custom URLs. Essentially, we require
15+
that the URL should have exactly three parts, delimited by /'s (excluding schema), the second part should be a
16+
12-digit account id, and the third part should be a valid queue name, per SQS naming conventions.
17+
"""
18+
if url is None:
1619
return None
17-
18-
region: str = _get_region(sqs_url)
19-
account_id: str = _get_account_id(sqs_url)
20-
partition: str = _get_partition(sqs_url)
21-
queue_name: str = _get_queue_name(sqs_url)
22-
23-
remote_target: List[Optional[str]] = []
24-
25-
if all((region, account_id, partition, queue_name)):
26-
remote_target.append("arn")
27-
28-
remote_target.extend(
29-
[
30-
_ARN_DELIMETER,
31-
_null_to_empty(partition),
32-
_ARN_DELIMETER,
33-
"sqs",
34-
_ARN_DELIMETER,
35-
_null_to_empty(region),
36-
_ARN_DELIMETER,
37-
_null_to_empty(account_id),
38-
_ARN_DELIMETER,
39-
queue_name,
40-
]
41-
)
42-
43-
return "".join(remote_target)
44-
45-
46-
def _strip_schema_from_url(url: str) -> str:
47-
return url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")
48-
49-
50-
def _get_region(sqs_url: str) -> Optional[str]:
51-
if sqs_url is None:
20+
url = url.replace(_HTTP_SCHEMA, "").replace(_HTTPS_SCHEMA, "")
21+
split_url: List[Optional[str]] = url.split("/")
22+
if len(split_url) == 3 and _is_account_id(split_url[1]) and _is_valid_queue_name(split_url[2]):
23+
return split_url[2]
5224
return None
5325

54-
if sqs_url.startswith("queue.amazonaws.com/"):
55-
return "us-east-1"
56-
57-
if _is_sqs_url(sqs_url):
58-
return _get_region_from_sqs_url(sqs_url)
59-
60-
if _is_legacy_sqs_url(sqs_url):
61-
return _get_region_from_legacy_sqs_url(sqs_url)
62-
63-
return None
64-
65-
66-
def _is_sqs_url(sqs_url: str) -> bool:
67-
split: List[Optional[str]] = sqs_url.split("/")
68-
return (
69-
len(split) == 3
70-
and split[0].startswith("sqs.")
71-
and split[0].endswith(".amazonaws.com")
72-
and _is_account_id(split[1])
73-
and _is_valid_queue_name(split[2])
74-
)
75-
76-
77-
def _is_legacy_sqs_url(sqs_url: str) -> bool:
78-
split: List[Optional[str]] = sqs_url.split("/")
79-
return (
80-
len(split) == 3
81-
and split[0].endswith(".queue.amazonaws.com")
82-
and _is_account_id(split[1])
83-
and _is_valid_queue_name(split[2])
84-
)
85-
86-
87-
def _is_custom_url(sqs_url: str) -> bool:
88-
split: List[Optional[str]] = sqs_url.split("/")
89-
return len(split) == 3 and _is_account_id(split[1]) and _is_valid_queue_name(split[2])
90-
91-
92-
def _is_valid_queue_name(input_str: str) -> bool:
93-
if len(input_str) == 0 or len(input_str) > 80:
94-
return False
95-
96-
for char in input_str:
97-
if char != "_" and char != "-" and not char.isalpha() and not char.isdigit():
98-
return False
99-
100-
return True
101-
10226

10327
def _is_account_id(input_str: str) -> bool:
104-
if len(input_str) != 12:
28+
if input_str is None or len(input_str) != 12:
10529
return False
10630

10731
try:
@@ -112,43 +36,12 @@ def _is_account_id(input_str: str) -> bool:
11236
return True
11337

11438

115-
def _get_region_from_sqs_url(sqs_url: str) -> Optional[str]:
116-
split: List[Optional[str]] = sqs_url.split(".")
117-
return split[1] if len(split) >= 2 else None
118-
119-
120-
def _get_region_from_legacy_sqs_url(sqs_url: str) -> Optional[str]:
121-
split: List[Optional[str]] = sqs_url.split(".")
122-
return split[0]
123-
124-
125-
def _get_account_id(sqs_url: str) -> Optional[str]:
126-
if sqs_url is None:
127-
return None
128-
129-
split: List[Optional[str]] = sqs_url.split("/")
130-
return split[1] if len(split) >= 2 else None
131-
132-
133-
def _get_partition(sqs_url: str) -> Optional[str]:
134-
region: Optional[str] = _get_region(sqs_url)
135-
136-
if region is None:
137-
return None
138-
139-
if region.startswith("us-gov-"):
140-
return "aws-us-gov"
141-
142-
if region.startswith("cn-"):
143-
return "aws-cn"
144-
145-
return "aws"
146-
147-
148-
def _get_queue_name(sqs_url: str) -> Optional[str]:
149-
split: List[Optional[str]] = sqs_url.split("/")
150-
return split[2] if len(split) >= 3 else None
39+
def _is_valid_queue_name(input_str: str) -> bool:
40+
if input_str is None or len(input_str) == 0 or len(input_str) > 80:
41+
return False
15142

43+
for char in input_str:
44+
if char != "_" and char != "-" and not char.isalpha() and not char.isdigit():
45+
return False
15246

153-
def _null_to_empty(input_str: str) -> str:
154-
return input_str if input_str is not None else ""
47+
return True

0 commit comments

Comments
 (0)