Skip to content

Commit 1072fbb

Browse files
authored
Customer Facing Statsbeat: Added remaining drop codes to base (#42382)
* Added remaining drop codes to base * Updated CHANGELOG * Fixed spell check errors * Addressed comments * Fixed spell check error * Changed handling for readonly and exceptions * Fix cspell error * Updated LocalFileSotrage and LocalFileBlob * Modified storage tests * Linting * Fixed cspell and lint errors * cspell * Addressed feedback
1 parent b365df7 commit 1072fbb

File tree

7 files changed

+2017
-85
lines changed

7 files changed

+2017
-85
lines changed

sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
## 1.0.0b42 (Unreleased)
44

55
### Features Added
6+
- Customer Facing Statsbeat: Added remaining drop codes to base
7+
([#42382](https://github.com/Azure/azure-sdk-for-python/pull/42382))
68

79
### Breaking Changes
810

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_storage.py

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,18 @@
77
import os
88
import random
99
import subprocess
10+
import errno
11+
from enum import Enum
1012

1113
from azure.monitor.opentelemetry.exporter._utils import PeriodicTask
1214

15+
from azure.monitor.opentelemetry.exporter.statsbeat._state import (
16+
get_local_storage_setup_state_exception,
17+
get_local_storage_setup_state_readonly,
18+
set_local_storage_setup_state_exception,
19+
set_local_storage_setup_state_readonly,
20+
)
21+
1322
logger = logging.getLogger(__name__)
1423

1524
ICACLS_PATH = os.path.join(
@@ -27,6 +36,10 @@ def _now():
2736
def _seconds(seconds):
2837
return datetime.timedelta(seconds=seconds)
2938

39+
class StorageExportResult(Enum):
40+
CLIENT_STORAGE_DISABLED = 1
41+
CLIENT_PERSISTENCE_CAPACITY_REACHED = 2
42+
CLIENT_READONLY = 3
3043

3144
# pylint: disable=broad-except
3245
class LocalFileBlob:
@@ -48,6 +61,8 @@ def get(self):
4861
return None
4962

5063
def put(self, data, lease_period=0):
64+
#TODO: Modify method to remove the return of self as it is not being used anywhere.
65+
# Add typing to method
5166
try:
5267
fullpath = self.fullpath + ".tmp"
5368
with open(fullpath, "w", encoding="utf-8") as file:
@@ -62,9 +77,8 @@ def put(self, data, lease_period=0):
6277
self.fullpath += "@{}.lock".format(_fmt(timestamp))
6378
os.rename(fullpath, self.fullpath)
6479
return self
65-
except Exception:
66-
pass # keep silent
67-
return None
80+
except Exception as ex:
81+
return str(ex)
6882

6983
def lease(self, period):
7084
timestamp = _now() + _seconds(period)
@@ -96,7 +110,6 @@ def __init__(
96110
self._max_size = max_size
97111
self._retention_period = retention_period
98112
self._write_timeout = write_timeout
99-
100113
self._enabled = self._check_and_set_folder_permissions()
101114
if self._enabled:
102115
self._maintenance_routine()
@@ -182,22 +195,34 @@ def get(self):
182195
return None
183196

184197
def put(self, data, lease_period=None):
185-
if not self._enabled:
186-
return None
187-
if not self._check_storage_size():
188-
return None
189-
blob = LocalFileBlob(
190-
os.path.join(
191-
self._path,
192-
"{}-{}.blob".format(
193-
_fmt(_now()),
194-
"{:08x}".format(random.getrandbits(32)), # thread-safe random
195-
),
198+
# TODO: Remove the blob.put result as we are not using it anywhere and use StorageExportResult instead,
199+
# Should still capture exceptions returned from LocalFileBlob.put
200+
# Add typing for method
201+
try:
202+
if not self._enabled:
203+
if get_local_storage_setup_state_readonly():
204+
return StorageExportResult.CLIENT_READONLY
205+
if get_local_storage_setup_state_exception() != "":
206+
# Type conversion has been done to match the return type of this function
207+
return str(get_local_storage_setup_state_exception())
208+
return StorageExportResult.CLIENT_STORAGE_DISABLED
209+
if not self._check_storage_size():
210+
return StorageExportResult.CLIENT_PERSISTENCE_CAPACITY_REACHED
211+
blob = LocalFileBlob(
212+
os.path.join(
213+
self._path,
214+
"{}-{}.blob".format(
215+
_fmt(_now()),
216+
"{:08x}".format(random.getrandbits(32)), # thread-safe random
217+
),
218+
)
196219
)
197-
)
198-
if lease_period is None:
199-
lease_period = self._lease_period
200-
return blob.put(data, lease_period=lease_period)
220+
if lease_period is None:
221+
lease_period = self._lease_period
222+
return blob.put(data, lease_period=lease_period)
223+
except Exception as ex:
224+
return str(ex)
225+
201226

202227
def _check_and_set_folder_permissions(self):
203228
"""
@@ -235,8 +260,13 @@ def _check_and_set_folder_permissions(self):
235260
else:
236261
os.chmod(self._path, 0o700)
237262
return True
238-
except Exception:
239-
pass # keep silent
263+
except OSError as error:
264+
if getattr(error, 'errno', None) == errno.EROFS: # cspell:disable-line
265+
set_local_storage_setup_state_readonly()
266+
else:
267+
set_local_storage_setup_state_exception(str(error))
268+
except Exception as ex:
269+
set_local_storage_setup_state_exception(str(ex))
240270
return False
241271

242272
def _check_storage_size(self):

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/_base.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,18 @@
4646
)
4747
# from azure.monitor.opentelemetry.exporter._configuration import _ConfigurationManager
4848
from azure.monitor.opentelemetry.exporter._connection_string_parser import ConnectionStringParser
49-
from azure.monitor.opentelemetry.exporter._storage import LocalFileStorage
49+
from azure.monitor.opentelemetry.exporter._storage import (
50+
LocalFileStorage,
51+
StorageExportResult,
52+
)
5053
from azure.monitor.opentelemetry.exporter._utils import _get_auth_policy
5154
from azure.monitor.opentelemetry.exporter.statsbeat._state import (
5255
get_statsbeat_initial_success,
5356
get_statsbeat_shutdown,
5457
increment_and_check_statsbeat_failure_count,
5558
is_statsbeat_enabled,
5659
set_statsbeat_initial_success,
60+
get_local_storage_setup_state_exception,
5761
)
5862
from azure.monitor.opentelemetry.exporter.statsbeat._utils import (
5963
_update_requests_map,
@@ -196,14 +200,35 @@ def _transmit_from_storage(self) -> None:
196200
else:
197201
blob.delete()
198202

203+
199204
def _handle_transmit_from_storage(self, envelopes: List[TelemetryItem], result: ExportResult) -> None:
200205
if self.storage:
201206
if result == ExportResult.FAILED_RETRYABLE:
202207
envelopes_to_store = [x.as_dict() for x in envelopes]
203-
self.storage.put(envelopes_to_store)
208+
result_from_storage_put = self.storage.put(envelopes_to_store)
209+
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
210+
if result_from_storage_put == StorageExportResult.CLIENT_STORAGE_DISABLED:
211+
# Track items that would have been retried but are dropped since client has local storage disabled
212+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, DropCode.CLIENT_STORAGE_DISABLED)
213+
elif result_from_storage_put == StorageExportResult.CLIENT_READONLY:
214+
# If filesystem is readonly, track dropped items in customer statsbeat
215+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, DropCode.CLIENT_READONLY)
216+
elif result_from_storage_put == StorageExportResult.CLIENT_PERSISTENCE_CAPACITY_REACHED:
217+
# If data has to be dropped due to persistent storage being full, track dropped items
218+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, DropCode.CLIENT_PERSISTENCE_CAPACITY)
219+
elif get_local_storage_setup_state_exception() != "":
220+
# For exceptions caught in _check_and_set_folder_permissions during storage setup
221+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, DropCode.CLIENT_EXCEPTION, result_from_storage_put)
222+
elif isinstance(result_from_storage_put, str):
223+
# For any exceptions occurred in put method of either LocalFileStorage or LocalFileBlob, track dropped item with reason
224+
_track_dropped_items(self._customer_statsbeat_metrics, envelopes, DropCode.CLIENT_EXCEPTION, result_from_storage_put)
225+
else:
226+
# LocalFileBlob.put returns either an exception(failure, handled above) or the file path(success), eventually that will be removed since this value is not being utilized elsewhere
227+
pass
204228
elif result == ExportResult.SUCCESS:
205229
# Try to send any cached events
206230
self._transmit_from_storage()
231+
207232
else:
208233
# Track items that would have been retried but are dropped since client has local storage disabled
209234
if self._customer_statsbeat_metrics and self._should_collect_customer_statsbeat():
@@ -453,7 +478,7 @@ def _should_collect_stats(self):
453478
# check to see whether its the case of customer stats collection
454479
def _should_collect_customer_statsbeat(self):
455480
# Import here to avoid circular dependencies
456-
from azure.monitor.opentelemetry.exporter.statsbeat._customer_statsbeat import get_customer_statsbeat_shutdown
481+
from azure.monitor.opentelemetry.exporter.statsbeat._state import get_customer_statsbeat_shutdown
457482

458483
env_value = os.environ.get("APPLICATIONINSIGHTS_STATSBEAT_ENABLED_PREVIEW", "")
459484
is_customer_statsbeat_enabled = env_value.lower() == "true"

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/statsbeat/_customer_statsbeat.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@
3737
)
3838
from azure.monitor.opentelemetry.exporter import VERSION
3939

40+
from azure.monitor.opentelemetry.exporter.statsbeat._state import (
41+
_CUSTOMER_STATSBEAT_STATE,
42+
_CUSTOMER_STATSBEAT_STATE_LOCK,
43+
)
44+
4045
_CUSTOMER_STATSBEAT_MAP_LOCK = threading.Lock()
4146

4247
class _CustomerStatsbeatTelemetryCounters:
@@ -256,10 +261,6 @@ def collect_customer_statsbeat(exporter):
256261
if hasattr(exporter, 'storage') and exporter.storage:
257262
exporter.storage._customer_statsbeat_metrics = _CUSTOMER_STATSBEAT_METRICS
258263

259-
_CUSTOMER_STATSBEAT_STATE = {
260-
"SHUTDOWN": False,
261-
}
262-
_CUSTOMER_STATSBEAT_STATE_LOCK = threading.Lock()
263264

264265
def shutdown_customer_statsbeat_metrics() -> None:
265266
global _CUSTOMER_STATSBEAT_METRICS
@@ -276,6 +277,3 @@ def shutdown_customer_statsbeat_metrics() -> None:
276277
if shutdown_success:
277278
with _CUSTOMER_STATSBEAT_STATE_LOCK:
278279
_CUSTOMER_STATSBEAT_STATE["SHUTDOWN"] = True
279-
280-
def get_customer_statsbeat_shutdown():
281-
return _CUSTOMER_STATSBEAT_STATE["SHUTDOWN"]

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/statsbeat/_state.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,17 @@
1919
_STATSBEAT_STATE_LOCK = threading.Lock()
2020
_STATSBEAT_FAILURE_COUNT_THRESHOLD = 3
2121

22+
_CUSTOMER_STATSBEAT_STATE = {
23+
"SHUTDOWN": False,
24+
}
25+
_CUSTOMER_STATSBEAT_STATE_LOCK = threading.Lock()
26+
27+
_LOCAL_STORAGE_SETUP_STATE = {
28+
"READONLY": False,
29+
"EXCEPTION_OCCURRED": ""
30+
}
31+
32+
_LOCAL_STORAGE_SETUP_STATE_LOCK = threading.Lock()
2233

2334
def is_statsbeat_enabled():
2435
disabled = os.environ.get(_APPLICATIONINSIGHTS_STATSBEAT_DISABLED_ALL)
@@ -68,3 +79,20 @@ def get_statsbeat_live_metrics_feature_set():
6879
def set_statsbeat_live_metrics_feature_set():
6980
with _STATSBEAT_STATE_LOCK:
7081
_STATSBEAT_STATE["LIVE_METRICS_FEATURE_SET"] = True
82+
83+
def get_customer_statsbeat_shutdown():
84+
return _CUSTOMER_STATSBEAT_STATE["SHUTDOWN"]
85+
86+
def get_local_storage_setup_state_readonly():
87+
return _LOCAL_STORAGE_SETUP_STATE["READONLY"]
88+
89+
def set_local_storage_setup_state_readonly():
90+
with _LOCAL_STORAGE_SETUP_STATE_LOCK:
91+
_LOCAL_STORAGE_SETUP_STATE["READONLY"] = True
92+
93+
def get_local_storage_setup_state_exception():
94+
return _LOCAL_STORAGE_SETUP_STATE["EXCEPTION_OCCURRED"]
95+
96+
def set_local_storage_setup_state_exception(value):
97+
with _LOCAL_STORAGE_SETUP_STATE_LOCK:
98+
_LOCAL_STORAGE_SETUP_STATE["EXCEPTION_OCCURRED"] = value

0 commit comments

Comments
 (0)