Skip to content

Commit 2b12567

Browse files
rads-1996Copilot
andauthored
Customer Facing Statsbeat: Updated base and storage to include the metrices for customer statsbeat (#42123)
* Updated base and storage to include the metrices for customer statsbeat * Updated readonly dropped items and CHANGELOG * Update sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/statsbeat/_customer_statsbeat.py Co-authored-by: Copilot <[email protected]> * Fixed linting errors * Updated base and exporter with retry and dropped items tracking * Resolved spell check errors * Resolved cspell errors * Removed changes from storage * Updated CHANGELOG * Added ending line to test_storage * Addressed feedback and modifed track_dropped_items --------- Co-authored-by: Copilot <[email protected]>
1 parent ef8496d commit 2b12567

File tree

7 files changed

+2305
-37
lines changed

7 files changed

+2305
-37
lines changed

sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
([#41971](https://github.com/Azure/azure-sdk-for-python/pull/41971))
3939
- Support AI Foundry by Handling GEN_AI_SYSTEM Attributes with [Spec](https://github.com/aep-health-and-standards/Telemetry-Collection-Spec/blob/main/ApplicationInsights/genai_semconv_mapping.md) ([#41705](https://github.com/Azure/azure-sdk-for-python/pull/41705))
4040
- Distinguish Azure AI SDKs in statsbeats ([#42016](https://github.com/Azure/azure-sdk-for-python/pull/42016))
41+
- Customer Facing Statsbeat: Updated base to include the metrics for customer statsbeat
42+
([#42123](https://github.com/Azure/azure-sdk-for-python/pull/42123))
43+
4144

4245
## 1.0.0b39 (2025-06-25)
4346

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@
144144
class DropCode(str, Enum, metaclass=CaseInsensitiveEnumMeta):
145145
CLIENT_READONLY = "CLIENT_READONLY"
146146
CLIENT_EXCEPTION = "CLIENT_EXCEPTION"
147-
CLIENT_STALE_DATA = "CLIENT_STALE_DATA"
148147
CLIENT_PERSISTENCE_CAPACITY = "CLIENT_PERSISTENCE_CAPACITY"
148+
CLIENT_STORAGE_DISABLED = "CLIENT_STORAGE_DISABLED"
149149
UNKNOWN = "UNKNOWN"
150150

151151
DropCodeType = Union[DropCode, int]

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/_base.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
_REQ_THROTTLE_NAME,
4343
_RETRYABLE_STATUS_CODES,
4444
_THROTTLE_STATUS_CODES,
45+
DropCode,
4546
)
4647
from azure.monitor.opentelemetry.exporter._connection_string_parser import ConnectionStringParser
4748
from azure.monitor.opentelemetry.exporter._storage import LocalFileStorage
@@ -53,7 +54,13 @@
5354
is_statsbeat_enabled,
5455
set_statsbeat_initial_success,
5556
)
56-
from azure.monitor.opentelemetry.exporter.statsbeat._utils import _update_requests_map
57+
from azure.monitor.opentelemetry.exporter.statsbeat._utils import (
58+
_update_requests_map,
59+
_track_dropped_items,
60+
_track_retry_items,
61+
_track_successful_items,
62+
)
63+
5764

5865
logger = logging.getLogger(__name__)
5966

@@ -162,6 +169,14 @@ def __init__(self, **kwargs: Any) -> None:
162169

163170
collect_statsbeat_metrics(self)
164171

172+
# Initialize customer statsbeat if enabled
173+
self._customer_statsbeat_metrics = None
174+
if self._should_collect_customer_statsbeat():
175+
176+
from azure.monitor.opentelemetry.exporter.statsbeat._customer_statsbeat import collect_customer_statsbeat
177+
# Collect customer statsbeat metrics
178+
collect_customer_statsbeat(self)
179+
165180
def _transmit_from_storage(self) -> None:
166181
if not self.storage:
167182
return
@@ -184,6 +199,10 @@ def _handle_transmit_from_storage(self, envelopes: List[TelemetryItem], result:
184199
elif result == ExportResult.SUCCESS:
185200
# Try to send any cached events
186201
self._transmit_from_storage()
202+
else:
203+
# Track items that would have been retried but are dropped since client has local storage disabled
204+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
205+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, DropCode.CLIENT_STORAGE_DISABLED)
187206

188207
# pylint: disable=too-many-branches
189208
# pylint: disable=too-many-nested-blocks
@@ -219,14 +238,25 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
219238
_update_requests_map(_REQ_SUCCESS_NAME[1], 1)
220239
reach_ingestion = True
221240
result = ExportResult.SUCCESS
241+
242+
# Track successful items in customer statsbeat
243+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
244+
_track_successful_items(self._customer_statsbeat_metrics, envelopes)
222245
else: # 206
223246
reach_ingestion = True
224247
resend_envelopes = []
225248
for error in track_response.errors:
226249
if _is_retryable_code(error.status_code):
227250
resend_envelopes.append(envelopes[error.index]) # type: ignore
251+
# Track retried items in customer statsbeat
252+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
253+
_track_retry_items(self._customer_statsbeat_metrics, resend_envelopes, error)
228254
else:
229255
if not self._is_stats_exporter():
256+
# Track dropped items in customer statsbeat, non-retryable scenario
257+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
258+
if error is not None and hasattr(error, "index") and error.index is not None:
259+
_track_dropped_items(self._customer_statsbeat_metrics, [envelopes[error.index]], error.status_code)
230260
logger.error(
231261
"Data drop %s: %s %s.",
232262
error.status_code,
@@ -237,6 +267,10 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
237267
envelopes_to_store = [x.as_dict() for x in resend_envelopes]
238268
self.storage.put(envelopes_to_store, 0)
239269
self._consecutive_redirects = 0
270+
elif resend_envelopes:
271+
# Track items that would have been retried but are dropped since client has local storage disabled
272+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
273+
_track_dropped_items(self._customer_statsbeat_metrics, resend_envelopes, DropCode.CLIENT_STORAGE_DISABLED)
240274
# Mark as not retryable because we already write to storage here
241275
result = ExportResult.FAILED_NOT_RETRYABLE
242276
except HttpResponseError as response_error:
@@ -249,6 +283,8 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
249283
result = ExportResult.FAILED_RETRYABLE
250284
# Log error for 401: Unauthorized, 403: Forbidden to assist with customer troubleshooting
251285
if not self._is_stats_exporter():
286+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
287+
_track_retry_items(self._customer_statsbeat_metrics, envelopes, response_error)
252288
if response_error.status_code == 401:
253289
logger.error(
254290
"Retryable server side error: %s. " \
@@ -257,6 +293,8 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
257293
response_error.message,
258294
)
259295
elif response_error.status_code == 403:
296+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
297+
_track_retry_items(self._customer_statsbeat_metrics, envelopes, response_error)
260298
logger.error(
261299
"Retryable server side error: %s. " \
262300
"Your application may be configured with a token credential " \
@@ -269,6 +307,10 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
269307
if self._should_collect_stats():
270308
_update_requests_map(_REQ_THROTTLE_NAME[1], value=response_error.status_code)
271309
result = ExportResult.FAILED_NOT_RETRYABLE
310+
311+
if not self._is_stats_exporter():
312+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
313+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, response_error.status_code)
272314
elif _is_redirect_code(response_error.status_code):
273315
self._consecutive_redirects = self._consecutive_redirects + 1
274316
# pylint: disable=W0212
@@ -286,12 +328,22 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
286328
result = self._transmit(envelopes)
287329
else:
288330
if not self._is_stats_exporter():
331+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
332+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, DropCode.CLIENT_EXCEPTION, "Error parsing redirect information.")
289333
logger.error(
290334
"Error parsing redirect information.",
291335
)
292336
result = ExportResult.FAILED_NOT_RETRYABLE
293337
else:
294338
if not self._is_stats_exporter():
339+
# Track dropped items in customer statsbeat, non-retryable scenario
340+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
341+
_track_dropped_items(
342+
self._customer_statsbeat_metrics,
343+
envelopes,
344+
DropCode.CLIENT_EXCEPTION,
345+
"Error sending telemetry because of circular redirects. Please check the integrity of your connection string."
346+
)
295347
logger.error(
296348
"Error sending telemetry because of circular redirects. "
297349
"Please check the integrity of your connection string."
@@ -311,20 +363,33 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
311363
"Non-retryable server side error: %s.",
312364
response_error.message,
313365
)
366+
# Track dropped items in customer statsbeat, non-retryable scenario
367+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
368+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, response_error.status_code)
314369
if _is_invalid_code(response_error.status_code):
315370
# Shutdown statsbeat on invalid code from customer endpoint
316371
# Import here to avoid circular dependencies
317372
from azure.monitor.opentelemetry.exporter.statsbeat._statsbeat import (
318373
shutdown_statsbeat_metrics,
319374
)
375+
from azure.monitor.opentelemetry.exporter.statsbeat._customer_statsbeat import (
376+
shutdown_customer_statsbeat_metrics,
377+
)
320378

321379
shutdown_statsbeat_metrics()
380+
# Also shutdown customer statsbeat on invalid code
381+
shutdown_customer_statsbeat_metrics()
322382
result = ExportResult.FAILED_NOT_RETRYABLE
323383
except ServiceRequestError as request_error:
324384
# Errors when we're fairly sure that the server did not receive the
325385
# request, so it should be safe to retry.
326386
# ServiceRequestError is raised by azure.core for these cases
327387
logger.warning("Retrying due to server request error: %s.", request_error.message)
388+
389+
# Track retry items in customer statsbeat for client-side exceptions
390+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
391+
_track_retry_items(self._customer_statsbeat_metrics, envelopes, request_error)
392+
328393
if self._should_collect_stats():
329394
exc_type = request_error.exc_type
330395
if exc_type is None or exc_type is type(None):
@@ -333,6 +398,11 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
333398
result = ExportResult.FAILED_RETRYABLE
334399
except Exception as ex:
335400
logger.exception("Envelopes could not be exported and are not retryable: %s.", ex) # pylint: disable=C4769
401+
402+
# Track dropped items in customer statsbeat for general exceptions
403+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
404+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, DropCode.CLIENT_EXCEPTION, str(ex))
405+
336406
if self._should_collect_stats():
337407
_update_requests_map(_REQ_EXCEPTION_NAME[1], value=ex.__class__.__name__)
338408
result = ExportResult.FAILED_NOT_RETRYABLE
@@ -355,6 +425,7 @@ def _transmit(self, envelopes: List[TelemetryItem]) -> ExportResult:
355425
)
356426

357427
shutdown_statsbeat_metrics()
428+
358429
# pylint: disable=lost-exception
359430
return ExportResult.FAILED_NOT_RETRYABLE # pylint: disable=W0134
360431
# pylint: disable=lost-exception
@@ -373,13 +444,31 @@ def _should_collect_stats(self):
373444
and not self._instrumentation_collection
374445
)
375446

447+
448+
# check to see whether its the case of customer stats collection
449+
def _should_collect_customer_statsbeat(self):
450+
# Import here to avoid circular dependencies
451+
from azure.monitor.opentelemetry.exporter.statsbeat._customer_statsbeat import get_customer_statsbeat_shutdown
452+
453+
env_value = os.environ.get("APPLICATIONINSIGHTS_STATSBEAT_ENABLED_PREVIEW", "")
454+
is_customer_statsbeat_enabled = env_value.lower() == "true"
455+
# Don't collect customer statsbeat for instrumentation collection or customer statsbeat exporters
456+
return (
457+
is_customer_statsbeat_enabled
458+
and not get_customer_statsbeat_shutdown()
459+
and not self._is_customer_stats_exporter()
460+
and not self._instrumentation_collection
461+
)
462+
376463
# check to see if statsbeat is in "attempting to be initialized" state
377464
def _is_statsbeat_initializing_state(self):
378465
return self._is_stats_exporter() and not get_statsbeat_shutdown() and not get_statsbeat_initial_success()
379466

380467
def _is_stats_exporter(self):
381468
return self.__class__.__name__ == "_StatsBeatExporter"
382469

470+
def _is_customer_stats_exporter(self):
471+
return getattr(self, '_is_customer_statsbeat', False)
383472

384473
def _is_invalid_code(response_code: Optional[int]) -> bool:
385474
"""Determine if response is a invalid response.

0 commit comments

Comments
 (0)