Skip to content

Commit 010e7df

Browse files
committed
add logs pipeline
1 parent c34f3b9 commit 010e7df

33 files changed

+1611
-841
lines changed

aws-opentelemetry-distro/pyproject.toml

Lines changed: 54 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -24,62 +24,62 @@ classifiers = [
2424
]
2525

2626
dependencies = [
27-
"opentelemetry-api == 1.27.0",
28-
"opentelemetry-sdk == 1.27.0",
29-
"opentelemetry-exporter-otlp-proto-grpc == 1.27.0",
30-
"opentelemetry-exporter-otlp-proto-http == 1.27.0",
31-
"opentelemetry-propagator-b3 == 1.27.0",
32-
"opentelemetry-propagator-jaeger == 1.27.0",
33-
"opentelemetry-exporter-otlp-proto-common == 1.27.0",
27+
"opentelemetry-api == 1.33.1",
28+
"opentelemetry-sdk == 1.33.1",
29+
"opentelemetry-exporter-otlp-proto-grpc == 1.33.1",
30+
"opentelemetry-exporter-otlp-proto-http == 1.33.1",
31+
"opentelemetry-propagator-b3 == 1.33.1",
32+
"opentelemetry-propagator-jaeger == 1.33.1",
33+
"opentelemetry-exporter-otlp-proto-common == 1.33.1",
3434
"opentelemetry-sdk-extension-aws == 2.0.2",
3535
"opentelemetry-propagator-aws-xray == 1.0.1",
36-
"opentelemetry-distro == 0.48b0",
37-
"opentelemetry-processor-baggage == 0.48b0",
38-
"opentelemetry-propagator-ot-trace == 0.48b0",
39-
"opentelemetry-instrumentation == 0.48b0",
40-
"opentelemetry-instrumentation-aws-lambda == 0.48b0",
41-
"opentelemetry-instrumentation-aio-pika == 0.48b0",
42-
"opentelemetry-instrumentation-aiohttp-client == 0.48b0",
43-
"opentelemetry-instrumentation-aiopg == 0.48b0",
44-
"opentelemetry-instrumentation-asgi == 0.48b0",
45-
"opentelemetry-instrumentation-asyncpg == 0.48b0",
46-
"opentelemetry-instrumentation-boto == 0.48b0",
47-
"opentelemetry-instrumentation-boto3sqs == 0.48b0",
48-
"opentelemetry-instrumentation-botocore == 0.48b0",
49-
"opentelemetry-instrumentation-celery == 0.48b0",
50-
"opentelemetry-instrumentation-confluent-kafka == 0.48b0",
51-
"opentelemetry-instrumentation-dbapi == 0.48b0",
52-
"opentelemetry-instrumentation-django == 0.48b0",
53-
"opentelemetry-instrumentation-elasticsearch == 0.48b0",
54-
"opentelemetry-instrumentation-falcon == 0.48b0",
55-
"opentelemetry-instrumentation-fastapi == 0.48b0",
56-
"opentelemetry-instrumentation-flask == 0.48b0",
57-
"opentelemetry-instrumentation-grpc == 0.48b0",
58-
"opentelemetry-instrumentation-httpx == 0.48b0",
59-
"opentelemetry-instrumentation-jinja2 == 0.48b0",
60-
"opentelemetry-instrumentation-kafka-python == 0.48b0",
61-
"opentelemetry-instrumentation-logging == 0.48b0",
62-
"opentelemetry-instrumentation-mysql == 0.48b0",
63-
"opentelemetry-instrumentation-mysqlclient == 0.48b0",
64-
"opentelemetry-instrumentation-pika == 0.48b0",
65-
"opentelemetry-instrumentation-psycopg2 == 0.48b0",
66-
"opentelemetry-instrumentation-pymemcache == 0.48b0",
67-
"opentelemetry-instrumentation-pymongo == 0.48b0",
68-
"opentelemetry-instrumentation-pymysql == 0.48b0",
69-
"opentelemetry-instrumentation-pyramid == 0.48b0",
70-
"opentelemetry-instrumentation-redis == 0.48b0",
71-
"opentelemetry-instrumentation-remoulade == 0.48b0",
72-
"opentelemetry-instrumentation-requests == 0.48b0",
73-
"opentelemetry-instrumentation-sqlalchemy == 0.48b0",
74-
"opentelemetry-instrumentation-sqlite3 == 0.48b0",
75-
"opentelemetry-instrumentation-starlette == 0.48b0",
76-
"opentelemetry-instrumentation-system-metrics == 0.48b0",
77-
"opentelemetry-instrumentation-tornado == 0.48b0",
78-
"opentelemetry-instrumentation-tortoiseorm == 0.48b0",
79-
"opentelemetry-instrumentation-urllib == 0.48b0",
80-
"opentelemetry-instrumentation-urllib3 == 0.48b0",
81-
"opentelemetry-instrumentation-wsgi == 0.48b0",
82-
"opentelemetry-instrumentation-cassandra == 0.48b0",
36+
"opentelemetry-distro == 0.54b1",
37+
"opentelemetry-processor-baggage == 0.54b1",
38+
"opentelemetry-propagator-ot-trace == 0.54b1",
39+
"opentelemetry-instrumentation == 0.54b1",
40+
"opentelemetry-instrumentation-aws-lambda == 0.54b1",
41+
"opentelemetry-instrumentation-aio-pika == 0.54b1",
42+
"opentelemetry-instrumentation-aiohttp-client == 0.54b1",
43+
"opentelemetry-instrumentation-aiopg == 0.54b1",
44+
"opentelemetry-instrumentation-asgi == 0.54b1",
45+
"opentelemetry-instrumentation-asyncpg == 0.54b1",
46+
"opentelemetry-instrumentation-boto == 0.54b1",
47+
"opentelemetry-instrumentation-boto3sqs == 0.54b1",
48+
"opentelemetry-instrumentation-botocore == 0.54b1",
49+
"opentelemetry-instrumentation-celery == 0.54b1",
50+
"opentelemetry-instrumentation-confluent-kafka == 0.54b1",
51+
"opentelemetry-instrumentation-dbapi == 0.54b1",
52+
"opentelemetry-instrumentation-django == 0.54b1",
53+
"opentelemetry-instrumentation-elasticsearch == 0.54b1",
54+
"opentelemetry-instrumentation-falcon == 0.54b1",
55+
"opentelemetry-instrumentation-fastapi == 0.54b1",
56+
"opentelemetry-instrumentation-flask == 0.54b1",
57+
"opentelemetry-instrumentation-grpc == 0.54b1",
58+
"opentelemetry-instrumentation-httpx == 0.54b1",
59+
"opentelemetry-instrumentation-jinja2 == 0.54b1",
60+
"opentelemetry-instrumentation-kafka-python == 0.54b1",
61+
"opentelemetry-instrumentation-logging == 0.54b1",
62+
"opentelemetry-instrumentation-mysql == 0.54b1",
63+
"opentelemetry-instrumentation-mysqlclient == 0.54b1",
64+
"opentelemetry-instrumentation-pika == 0.54b1",
65+
"opentelemetry-instrumentation-psycopg2 == 0.54b1",
66+
"opentelemetry-instrumentation-pymemcache == 0.54b1",
67+
"opentelemetry-instrumentation-pymongo == 0.54b1",
68+
"opentelemetry-instrumentation-pymysql == 0.54b1",
69+
"opentelemetry-instrumentation-pyramid == 0.54b1",
70+
"opentelemetry-instrumentation-redis == 0.54b1",
71+
"opentelemetry-instrumentation-remoulade == 0.54b1",
72+
"opentelemetry-instrumentation-requests == 0.54b1",
73+
"opentelemetry-instrumentation-sqlalchemy == 0.54b1",
74+
"opentelemetry-instrumentation-sqlite3 == 0.54b1",
75+
"opentelemetry-instrumentation-starlette == 0.54b1",
76+
"opentelemetry-instrumentation-system-metrics == 0.54b1",
77+
"opentelemetry-instrumentation-tornado == 0.54b1",
78+
"opentelemetry-instrumentation-tortoiseorm == 0.54b1",
79+
"opentelemetry-instrumentation-urllib == 0.54b1",
80+
"opentelemetry-instrumentation-urllib3 == 0.54b1",
81+
"opentelemetry-instrumentation-wsgi == 0.54b1",
82+
"opentelemetry-instrumentation-cassandra == 0.54b1",
8383
]
8484

8585
[project.optional-dependencies]

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
)
3636
from amazon.opentelemetry.distro._aws_resource_attribute_configurator import get_service_attribute
3737
from amazon.opentelemetry.distro._aws_span_processing_util import (
38-
GEN_AI_REQUEST_MODEL,
3938
LOCAL_ROOT,
4039
MAX_KEYWORD_LENGTH,
4140
SQL_KEYWORD_PATTERN,
@@ -60,6 +59,7 @@
6059
from amazon.opentelemetry.distro.sqs_url_parser import SqsUrlParser
6160
from opentelemetry.sdk.resources import Resource
6261
from opentelemetry.sdk.trace import BoundedAttributes, ReadableSpan
62+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_REQUEST_MODEL
6363
from opentelemetry.semconv.trace import SpanAttributes
6464

6565
# Pertinent OTEL attribute keys

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,6 @@
2626
# Max keyword length supported by parsing into remote_operation from DB_STATEMENT
2727
MAX_KEYWORD_LENGTH = 27
2828

29-
# TODO: Use Semantic Conventions once upgrade to 0.47b0
30-
GEN_AI_REQUEST_MODEL: str = "gen_ai.request.model"
31-
GEN_AI_SYSTEM: str = "gen_ai.system"
32-
GEN_AI_REQUEST_MAX_TOKENS: str = "gen_ai.request.max_tokens"
33-
GEN_AI_REQUEST_TEMPERATURE: str = "gen_ai.request.temperature"
34-
GEN_AI_REQUEST_TOP_P: str = "gen_ai.request.top_p"
35-
GEN_AI_RESPONSE_FINISH_REASONS: str = "gen_ai.response.finish_reasons"
36-
GEN_AI_USAGE_INPUT_TOKENS: str = "gen_ai.usage.input_tokens"
37-
GEN_AI_USAGE_OUTPUT_TOKENS: str = "gen_ai.usage.output_tokens"
38-
3929

4030
# Get dialect keywords retrieved from dialect_keywords.json file.
4131
# Only meant to be invoked by SQL_KEYWORD_PATTERN and unit tests

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_utils.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import os
5-
import sys
5+
from importlib.metadata import PackageNotFoundError, version
66
from logging import Logger, getLogger
77

8-
import pkg_resources
8+
from packaging.requirements import Requirement
99

1010
_logger: Logger = getLogger(__name__)
1111

@@ -14,15 +14,21 @@
1414

1515
def is_installed(req: str) -> bool:
1616
"""Is the given required package installed?"""
17-
18-
if req in sys.modules and sys.modules[req] is not None:
19-
return True
17+
req = Requirement(req)
2018

2119
try:
22-
pkg_resources.get_distribution(req)
23-
except Exception as exc: # pylint: disable=broad-except
20+
dist_version = version(req.name)
21+
except PackageNotFoundError as exc:
2422
_logger.debug("Skipping instrumentation patch: package %s, exception: %s", req, exc)
2523
return False
24+
25+
if not list(req.specifier.filter([dist_version])):
26+
_logger.debug(
27+
"instrumentation for package %s is available but version %s is installed. Skipping.",
28+
req,
29+
dist_version,
30+
)
31+
return False
2632
return True
2733

2834

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,11 @@ def _init_logging(
180180
resource: Resource = None,
181181
):
182182

183-
# Provides a default OTLP log exporter when none is specified.
183+
# Provides a default OTLP log exporter when it's not set.
184184
# This is the behavior for the logs exporters for other languages.
185-
logs_exporter = os.environ.get("OTEL_LOGS_EXPORTER")
185+
logs_exporters = os.environ.get("OTEL_LOGS_EXPORTER")
186186

187-
if not exporters and logs_exporter and (logs_exporter.lower() != "none"):
187+
if not exporters and logs_exporters and logs_exporters.lower() != "none":
188188
exporters = {"otlp": OTLPLogExporter}
189189

190190
provider = LoggerProvider(resource=resource)

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py

Lines changed: 63 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
14
import logging
2-
from typing import Mapping, Optional, Sequence, cast
5+
from typing import List, Mapping, Optional, Sequence, cast
36

47
from amazon.opentelemetry.distro.exporter.otlp.aws.logs.otlp_aws_logs_exporter import OTLPAwsLogExporter
5-
from opentelemetry.context import (
6-
_SUPPRESS_INSTRUMENTATION_KEY,
7-
attach,
8-
detach,
9-
set_value,
10-
)
8+
from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY, attach, detach, set_value
119
from opentelemetry.sdk._logs import LogData
1210
from opentelemetry.sdk._logs._internal.export import BatchLogExportStrategy
1311
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
@@ -18,7 +16,7 @@
1816

1917
class AwsBatchLogRecordProcessor(BatchLogRecordProcessor):
2018
_BASE_LOG_BUFFER_BYTE_SIZE = (
21-
2000 # Buffer size in bytes to account for log metadata not included in the body size calculation
19+
1000 # Buffer size in bytes to account for log metadata not included in the body or attribute size calculation
2220
)
2321
_MAX_LOG_REQUEST_BYTE_SIZE = (
2422
1048576 # https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-OTLPEndpoint.html
@@ -66,7 +64,7 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None:
6664

6765
for _ in range(batch_length):
6866
log_data: LogData = self._queue.pop()
69-
log_size = self._BASE_LOG_BUFFER_BYTE_SIZE + self._get_any_value_size(log_data.log_record.body)
67+
log_size = self._estimate_log_size(log_data)
7068

7169
if batch and (batch_data_size + log_size > self._MAX_LOG_REQUEST_BYTE_SIZE):
7270
# if batch_data_size > MAX_LOG_REQUEST_BYTE_SIZE then len(batch) == 1
@@ -88,64 +86,74 @@ def _export(self, batch_strategy: BatchLogExportStrategy) -> None:
8886
self._exporter.set_gen_ai_log_flag()
8987

9088
self._exporter.export(batch)
91-
except Exception as e: # pylint: disable=broad-exception-caught
92-
_logger.exception("Exception while exporting logs: " + str(e))
89+
except Exception as exception: # pylint: disable=broad-exception-caught
90+
_logger.exception("Exception while exporting logs: " + str(exception))
9391
detach(token)
9492

95-
def _get_any_value_size(self, val: AnyValue, depth: int = 3) -> int:
93+
def _estimate_log_size(self, log: LogData, depth: int = 3) -> int:
9694
"""
97-
Only used to indicate whether we should export a batch log size of 1 or not.
98-
Calculates the size in bytes of an AnyValue object.
99-
Will processs complex AnyValue structures up to the specified depth limit.
100-
If the depth limit of the AnyValue structure is exceeded, returns 0.
95+
Estimates the size in bytes of a log by calculating the size of its body and its attributes
96+
and adding a buffer amount to account for other log metadata information.
97+
Will process complex log structures up to the specified depth limit.
98+
If the depth limit of the log structure is exceeded, returns truncates calculation
99+
to everything up to that point.
101100
102101
Args:
103-
val: The AnyValue object to calculate size for
102+
log: The Log object to calculate size for
104103
depth: Maximum depth to traverse in nested structures (default: 3)
105104
106105
Returns:
107-
int: Total size of the AnyValue object in bytes
106+
int: The estimated size of the log object in bytes
108107
"""
109-
# Use a stack to prevent excessive recursive calls.
110-
stack = [(val, 0)]
111-
size: int = 0
112-
113-
while stack:
114-
# small optimization. We can stop calculating the size once it reaches the 1 MB limit.
115-
if size >= self._MAX_LOG_REQUEST_BYTE_SIZE:
116-
return size
117-
118-
next_val, current_depth = stack.pop()
119-
120-
if isinstance(next_val, (str, bytes)):
121-
size += len(next_val)
122-
continue
123-
124-
if isinstance(next_val, bool):
125-
size += 4 if next_val else 5
126-
continue
127-
128-
if isinstance(next_val, (float, int)):
129-
size += len(str(next_val))
130-
continue
131-
132-
if current_depth <= depth:
133-
if isinstance(next_val, Sequence):
134-
for content in next_val:
135-
stack.append((cast(AnyValue, content), current_depth + 1))
136-
137-
if isinstance(next_val, Mapping):
138-
for key, content in next_val.items():
139-
size += len(key)
140-
stack.append((content, current_depth + 1))
141-
else:
142-
_logger.debug("Max log depth exceeded. Log data size will not be accurately calculated.")
143-
return 0
108+
109+
# Use a queue to prevent excessive recursive calls.
110+
# We calculate based on the size of the log record body and attributes for the log.
111+
queue: List[tuple[AnyValue, int]] = [(log.log_record.body, 0), (log.log_record.attributes, -1)]
112+
113+
size: int = self._BASE_LOG_BUFFER_BYTE_SIZE
114+
115+
while queue:
116+
new_queue: List[tuple[AnyValue, int]] = []
117+
118+
for data in queue:
119+
# small optimization, can stop calculating the size once it reaches the 1 MB limit.
120+
if size >= self._MAX_LOG_REQUEST_BYTE_SIZE:
121+
return size
122+
123+
next_val, current_depth = data
124+
125+
if isinstance(next_val, (str, bytes)):
126+
size += len(next_val)
127+
continue
128+
129+
if isinstance(next_val, bool):
130+
size += 4 if next_val else 5
131+
continue
132+
133+
if isinstance(next_val, (float, int)):
134+
size += len(str(next_val))
135+
continue
136+
137+
if current_depth <= depth:
138+
if isinstance(next_val, Sequence):
139+
for content in next_val:
140+
new_queue.append((cast(AnyValue, content), current_depth + 1))
141+
142+
if isinstance(next_val, Mapping):
143+
for key, content in next_val.items():
144+
size += len(key)
145+
new_queue.append((content, current_depth + 1))
146+
else:
147+
_logger.debug(
148+
f"Max log depth of {depth} exceeded. Log data size will not be accurately calculated."
149+
)
150+
151+
queue = new_queue
144152

145153
return size
146154

147155
@staticmethod
148-
def _is_gen_ai_log(log_data: LogData) -> bool:
156+
def _is_gen_ai_log(log: LogData) -> bool:
149157
"""
150158
Is the log a Gen AI log event?
151159
"""
@@ -157,4 +165,4 @@ def _is_gen_ai_log(log_data: LogData) -> bool:
157165
"openlit.otel.tracing",
158166
}
159167

160-
return log_data.instrumentation_scope.name in gen_ai_instrumentations
168+
return log.instrumentation_scope.name in gen_ai_instrumentations

0 commit comments

Comments
 (0)