Skip to content

Commit 428c2b6

Browse files
authored
Add live metrics collection of requests/dependencies/exceptions (#34673)
1 parent 36e3080 commit 428c2b6

File tree

13 files changed

+799
-96
lines changed

13 files changed

+799
-96
lines changed

sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
### Features Added
66

7+
- Add live metrics collection of requests/dependencies/exceptions
8+
([#34673](https://github.com/Azure/azure-sdk-for-python/pull/34673))
9+
710
### Breaking Changes
811

912
### Bugs Fixed

sdk/monitor/azure-monitor-opentelemetry-exporter/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Microsoft OpenTelemetry exporter for Azure Monitor
22

3-
The exporter for Azure Monitor allows Python applications to export data from the OpenTelemetry SDK to Azure Monitor. The exporter is intended for users who require advanced configuration or has more complicated telemetry needs that require all of distributed tracing, logging and metrics. If you have simpler configuration requirements, we recommend using the [Azure Monitor OpenTelemetry Distro](https://learn.microsoft.com/azure/azure-monitor/app/opentelemetry-enable?tabs=python) instead for a simpler one-line setup.
3+
The exporter for Azure Monitor allows Python applications to export data from the OpenTelemetry SDK to Azure Monitor. The exporter is intended for users who require advanced configuration or have more complicated telemetry needs that require all of distributed tracing, logging and metrics. If you have simpler configuration requirements, we recommend using the [Azure Monitor OpenTelemetry Distro](https://learn.microsoft.com/azure/azure-monitor/app/opentelemetry-enable?tabs=python) instead for a simpler one-line setup.
44

55
Prior to using this SDK, please read and understand [Data Collection Basics](https://learn.microsoft.com/azure/azure-monitor/app/opentelemetry-overview?tabs=python), especially the section on [telemetry types](https://learn.microsoft.com/azure/azure-monitor/app/opentelemetry-overview?tabs=python#telemetry-types). OpenTelemetry terminology differs from Application Insights terminology so it is important to understand the way the telemetry types map to each other.
66

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_constants.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Copyright (c) Microsoft Corporation. All rights reserved.
22
# Licensed under the MIT License.
3+
from enum import Enum
4+
35
# cSpell:disable
46

57
# (OpenTelemetry metric name, Quickpulse metric name)
@@ -33,4 +35,18 @@
3335
]
3436
)
3537

38+
# Quickpulse intervals
39+
_SHORT_PING_INTERVAL_SECONDS = 5
40+
_POST_INTERVAL_SECONDS = 1
41+
_LONG_PING_INTERVAL_SECONDS = 60
42+
_POST_CANCEL_INTERVAL_SECONDS = 20
43+
44+
# Live metrics data types
45+
class _DocumentIngressDocumentType(Enum):
46+
Request = "Request"
47+
RemoteDependency = "RemoteDependency"
48+
Exception = "Exception"
49+
Event = "Event"
50+
Trace = "Trace"
51+
3652
# cSpell:disable

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_exporter.py

Lines changed: 29 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# Copyright (c) Microsoft Corporation. All rights reserved.
22
# Licensed under the MIT License.
3-
from datetime import datetime, timezone
4-
from enum import Enum
5-
from typing import Any, List, Optional
3+
from typing import Any, Optional
64

75
from opentelemetry.context import (
86
_SUPPRESS_INSTRUMENTATION_KEY,
@@ -13,16 +11,8 @@
1311
from opentelemetry.sdk.metrics import (
1412
Counter,
1513
Histogram,
16-
ObservableCounter,
17-
ObservableGauge,
18-
ObservableUpDownCounter,
19-
UpDownCounter,
20-
)
21-
from opentelemetry.sdk.metrics._internal.point import (
22-
NumberDataPoint,
23-
HistogramDataPoint,
24-
MetricsData,
2514
)
15+
from opentelemetry.sdk.metrics._internal.point import MetricsData
2616
from opentelemetry.sdk.metrics.export import (
2717
AggregationTemporality,
2818
MetricExporter,
@@ -32,31 +22,33 @@
3222
)
3323

3424
from azure.core.exceptions import HttpResponseError
35-
from azure.monitor.opentelemetry.exporter._quickpulse._constants import _QUICKPULSE_METRIC_NAME_MAPPINGS
25+
from azure.monitor.opentelemetry.exporter._quickpulse._constants import (
26+
_LONG_PING_INTERVAL_SECONDS,
27+
_POST_CANCEL_INTERVAL_SECONDS,
28+
_POST_INTERVAL_SECONDS,
29+
)
3630
from azure.monitor.opentelemetry.exporter._quickpulse._generated._client import QuickpulseClient
37-
from azure.monitor.opentelemetry.exporter._quickpulse._generated.models import (
38-
DocumentIngress,
39-
MetricPoint,
40-
MonitoringDataPoint,
31+
from azure.monitor.opentelemetry.exporter._quickpulse._generated.models import MonitoringDataPoint
32+
from azure.monitor.opentelemetry.exporter._quickpulse._state import (
33+
_get_global_quickpulse_state,
34+
_is_ping_state,
35+
_set_global_quickpulse_state,
36+
_get_and_clear_quickpulse_documents,
37+
_QuickpulseState,
38+
)
39+
from azure.monitor.opentelemetry.exporter._quickpulse._utils import (
40+
_metric_to_quick_pulse_data_points,
4141
)
4242
from azure.monitor.opentelemetry.exporter._connection_string_parser import ConnectionStringParser
4343
from azure.monitor.opentelemetry.exporter._utils import _ticks_since_dot_net_epoch, PeriodicTask
4444

4545

46-
_APPLICATION_INSIGHTS_METRIC_TEMPORALITIES = {
46+
_QUICKPULSE_METRIC_TEMPORALITIES = {
47+
# Use DELTA temporalities because we want to reset the counts every collection interval
4748
Counter: AggregationTemporality.DELTA,
4849
Histogram: AggregationTemporality.DELTA,
49-
ObservableCounter: AggregationTemporality.DELTA,
50-
ObservableGauge: AggregationTemporality.CUMULATIVE,
51-
ObservableUpDownCounter: AggregationTemporality.CUMULATIVE,
52-
UpDownCounter: AggregationTemporality.CUMULATIVE,
5350
}
5451

55-
_SHORT_PING_INTERVAL_SECONDS = 5
56-
_POST_INTERVAL_SECONDS = 1
57-
_LONG_PING_INTERVAL_SECONDS = 60
58-
_POST_CANCEL_INTERVAL_SECONDS = 20
59-
6052

6153
class _Response:
6254
"""Response that encapsulates pipeline response and response headers from
@@ -91,7 +83,7 @@ def __init__(self, connection_string: Optional[str]) -> None:
9183

9284
MetricExporter.__init__(
9385
self,
94-
preferred_temporality=_APPLICATION_INSIGHTS_METRIC_TEMPORALITIES, # type: ignore
86+
preferred_temporality=_QUICKPULSE_METRIC_TEMPORALITIES, # type: ignore
9587
)
9688

9789
def export(
@@ -116,7 +108,7 @@ def export(
116108
data_points = _metric_to_quick_pulse_data_points(
117109
metrics_data,
118110
base_monitoring_data_point=base_monitoring_data_point,
119-
documents=kwargs.get("documents"),
111+
documents=_get_and_clear_quickpulse_documents(),
120112
)
121113

122114
token = attach(set_value(_SUPPRESS_INSTRUMENTATION_KEY, True))
@@ -190,16 +182,6 @@ def _ping(self, monitoring_data_point) -> Optional[_Response]:
190182
return ping_response
191183

192184

193-
class _QuickpulseState(Enum):
194-
"""Current state of quickpulse service.
195-
The numerical value represents the ping/post interval in ms for those states.
196-
"""
197-
198-
PING_SHORT = _SHORT_PING_INTERVAL_SECONDS
199-
PING_LONG = _LONG_PING_INTERVAL_SECONDS
200-
POST_SHORT = _POST_INTERVAL_SECONDS
201-
202-
203185
class _QuickpulseMetricReader(MetricReader):
204186

205187
def __init__(
@@ -208,7 +190,6 @@ def __init__(
208190
base_monitoring_data_point: MonitoringDataPoint,
209191
) -> None:
210192
self._exporter = exporter
211-
self._quick_pulse_state = _QuickpulseState.PING_SHORT
212193
self._base_monitoring_data_point = base_monitoring_data_point
213194
self._elapsed_num_seconds = 0
214195
self._worker = PeriodicTask(
@@ -224,9 +205,9 @@ def __init__(
224205
self._worker.start()
225206

226207
def _ticker(self) -> None:
227-
if self._is_ping_state():
208+
if _is_ping_state():
228209
# Send a ping if elapsed number of request meets the threshold
229-
if self._elapsed_num_seconds % int(self._quick_pulse_state.value) == 0:
210+
if self._elapsed_num_seconds % _get_global_quickpulse_state().value == 0:
230211
print("pinging...")
231212
ping_response = self._exporter._ping( # pylint: disable=protected-access
232213
self._base_monitoring_data_point,
@@ -236,22 +217,22 @@ def _ticker(self) -> None:
236217
if header and header == "true":
237218
print("ping succeeded: switching to post")
238219
# Switch state to post if subscribed
239-
self._quick_pulse_state = _QuickpulseState.POST_SHORT
220+
_set_global_quickpulse_state(_QuickpulseState.POST_SHORT)
240221
self._elapsed_num_seconds = 0
241222
else:
242223
# Backoff after _LONG_PING_INTERVAL_SECONDS (60s) of no successful requests
243-
if self._quick_pulse_state is _QuickpulseState.PING_SHORT and \
224+
if _get_global_quickpulse_state() is _QuickpulseState.PING_SHORT and \
244225
self._elapsed_num_seconds >= _LONG_PING_INTERVAL_SECONDS:
245226
print("ping failed for 60s, switching to pinging every 60s")
246-
self._quick_pulse_state = _QuickpulseState.PING_LONG
227+
_set_global_quickpulse_state(_QuickpulseState.PING_LONG)
247228
# TODO: Implement redirect
248229
else:
249230
# Erroneous ping responses instigate backoff logic
250231
# Backoff after _LONG_PING_INTERVAL_SECONDS (60s) of no successful requests
251-
if self._quick_pulse_state is _QuickpulseState.PING_SHORT and \
232+
if _get_global_quickpulse_state() is _QuickpulseState.PING_SHORT and \
252233
self._elapsed_num_seconds >= _LONG_PING_INTERVAL_SECONDS:
253234
print("ping failed for 60s, switching to pinging every 60s")
254-
self._quick_pulse_state = _QuickpulseState.PING_LONG
235+
_set_global_quickpulse_state(_QuickpulseState.PING_LONG)
255236
else:
256237
print("posting...")
257238
try:
@@ -262,7 +243,7 @@ def _ticker(self) -> None:
262243
# And resume pinging
263244
if self._elapsed_num_seconds >= _POST_CANCEL_INTERVAL_SECONDS:
264245
print("post failed for 20s, switching to pinging")
265-
self._quick_pulse_state = _QuickpulseState.PING_SHORT
246+
_set_global_quickpulse_state(_QuickpulseState.PING_SHORT)
266247
self._elapsed_num_seconds = 0
267248

268249
self._elapsed_num_seconds += 1
@@ -277,7 +258,6 @@ def _receive_metrics(
277258
metrics_data,
278259
timeout_millis=timeout_millis,
279260
base_monitoring_data_point=self._base_monitoring_data_point,
280-
documents=[],
281261
)
282262
if result is MetricExportResult.FAILURE:
283263
# There is currently no way to propagate unsuccessful metric post so
@@ -288,41 +268,3 @@ def _receive_metrics(
288268
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
289269
self._worker.cancel()
290270
self._worker.join()
291-
292-
def _is_ping_state(self):
293-
return self._quick_pulse_state in (_QuickpulseState.PING_SHORT, _QuickpulseState.PING_LONG)
294-
295-
def _metric_to_quick_pulse_data_points( # pylint: disable=too-many-nested-blocks
296-
metrics_data: OTMetricsData,
297-
base_monitoring_data_point: MonitoringDataPoint,
298-
documents: Optional[List[DocumentIngress]],
299-
) -> List[MonitoringDataPoint]:
300-
metric_points = []
301-
for resource_metric in metrics_data.resource_metrics:
302-
for scope_metric in resource_metric.scope_metrics:
303-
for metric in scope_metric.metrics:
304-
for point in metric.data.data_points:
305-
if point is not None:
306-
metric_point = MetricPoint(
307-
name=_QUICKPULSE_METRIC_NAME_MAPPINGS[metric.name.lower()],
308-
weight=1,
309-
)
310-
if isinstance(point, HistogramDataPoint):
311-
metric_point.value = point.sum
312-
elif isinstance(point, NumberDataPoint):
313-
metric_point.value = point.value
314-
else:
315-
metric_point.value = 0
316-
metric_points.append(metric_point)
317-
return [
318-
MonitoringDataPoint(
319-
version=base_monitoring_data_point.version,
320-
instance=base_monitoring_data_point.instance,
321-
role_name=base_monitoring_data_point.role_name,
322-
machine_name=base_monitoring_data_point.machine_name,
323-
stream_id=base_monitoring_data_point.stream_id,
324-
timestamp=datetime.now(tz=timezone.utc),
325-
metrics=metric_points,
326-
documents=documents,
327-
)
328-
]

0 commit comments

Comments
 (0)