Skip to content

Commit 859ab96

Browse files
committed
telemetry/module.py: update gather_perf_counters to emit values of labeled counters
Until now, gather_perf_counters only included the values of unlabeled counters. We update the API to include the values of labeled counters. This change also updates the format of how these values are now emitted. The perf counters in the telemetry report are now shows as below: ``` "perf_counters": { "mon.239f8ba73d60451130f972fcd40d9b409b1bfb66": { "AsyncMessenger::Worker": [ { "counters": { "msgr_connection_idle_timeouts": { "value": 0 }, "msgr_connection_ready_timeouts": { "value": 0 } }, "labels": { "id": "0" } } ], ``` Notice that each counter now has two new subfields: 'counters' and 'labels'. Signed-off-by: Naveen Naidu <[email protected]>
1 parent 21d3591 commit 859ab96

File tree

1 file changed

+73
-58
lines changed

1 file changed

+73
-58
lines changed

src/pybind/mgr/telemetry/module.py

Lines changed: 73 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -808,27 +808,36 @@ def gather_crashinfo(self) -> List[Dict[str, str]]:
808808
return crashlist
809809

810810
def gather_perf_counters(self, mode: str = 'separated') -> Dict[str, dict]:
811-
# Extract perf counter data with get_unlabeled_perf_counters(), a method
812-
# from mgr/mgr_module.py. This method returns a nested dictionary that
813-
# looks a lot like perf schema, except with some additional fields.
814-
#
815-
# Example of output, a snapshot of a mon daemon:
816-
# "mon.b": {
817-
# "bluestore.kv_flush_lat": {
818-
# "count": 2431,
819-
# "description": "Average kv_thread flush latency",
820-
# "nick": "fl_l",
821-
# "priority": 8,
822-
# "type": 5,
823-
# "units": 1,
824-
# "value": 88814109
825-
# },
826-
# },
827-
perf_counters = self.get_unlabeled_perf_counters()
811+
"""
812+
Extract perf counter data with get_perf_counters(), a method from
813+
mgr/mgr_module.py. This method returns a nested dictionary that looks a
814+
lot like perf schema, except with some additional fields.
815+
816+
Example of output, a snapshot of a mon daemon:
817+
"mon.b":{
818+
"bluestore": [
819+
{
820+
"labels": {},
821+
"counters": {
822+
"kv_flush_lat": {
823+
"description": "bluestore.kv_flush_lat",
824+
"nick": "kfsl",
825+
"type": 5,
826+
"priority": 8,
827+
"units": 1,
828+
"value": 14814406948,
829+
"count": 141
830+
},
831+
}
832+
},
833+
]
834+
}
835+
836+
"""
837+
perf_counters = self.get_perf_counters()
828838

829839
# Initialize 'result' dict
830-
result: Dict[str, dict] = defaultdict(lambda: defaultdict(
831-
lambda: defaultdict(lambda: defaultdict(int))))
840+
result: Dict[str, dict] = defaultdict(lambda: defaultdict(list))
832841

833842
# 'separated' mode
834843
anonymized_daemon_dict = {}
@@ -850,11 +859,7 @@ def gather_perf_counters(self, mode: str = 'separated') -> Dict[str, dict]:
850859
else:
851860
result[daemon_type]['num_combined_daemons'] += 1
852861

853-
for collection in perf_counters_by_daemon:
854-
# Split the collection to avoid redundancy in final report; i.e.:
855-
# bluestore.kv_flush_lat, bluestore.kv_final_lat -->
856-
# bluestore: kv_flush_lat, kv_final_lat
857-
col_0, col_1 = collection.split('.')
862+
for collection, sub_collection_list in perf_counters_by_daemon.items():
858863

859864
# Debug log for empty keys. This initially was a problem for prioritycache
860865
# perf counters, where the col_0 was empty for certain mon counters:
@@ -864,42 +869,52 @@ def gather_perf_counters(self, mode: str = 'separated') -> Dict[str, dict]:
864869
# "cache_bytes": {...}, "cache_bytes": {...},
865870
#
866871
# This log is here to detect any future instances of a similar issue.
867-
if (daemon == "") or (col_0 == "") or (col_1 == ""):
872+
if (daemon == "") or (collection == ""):
868873
self.log.debug("Instance of an empty key: {}{}".format(daemon, collection))
874+
continue
869875

870-
if mode == 'separated':
871-
# Add value to result
872-
result[daemon][col_0][col_1]['value'] = \
873-
perf_counters_by_daemon[collection]['value']
874-
875-
# Check that 'count' exists, as not all counters have a count field.
876-
if 'count' in perf_counters_by_daemon[collection]:
877-
result[daemon][col_0][col_1]['count'] = \
878-
perf_counters_by_daemon[collection]['count']
879-
elif mode == 'aggregated':
880-
# Not every rgw daemon has the same schema. Specifically, each rgw daemon
881-
# has a uniquely-named collection that starts off identically (i.e.
882-
# "objecter-0x...") then diverges (i.e. "...55f4e778e140.op_rmw").
883-
# This bit of code combines these unique counters all under one rgw instance.
884-
# Without this check, the schema would remain separeted out in the final report.
885-
if col_0[0:11] == "objecter-0x":
886-
col_0 = "objecter-0x"
887-
888-
# Check that the value can be incremented. In some cases,
889-
# the files are of type 'pair' (real-integer-pair, integer-integer pair).
890-
# In those cases, the value is a dictionary, and not a number.
891-
# i.e. throttle-msgr_dispatch_throttler-hbserver["wait"]
892-
if isinstance(perf_counters_by_daemon[collection]['value'], numbers.Number):
893-
result[daemon_type][col_0][col_1]['value'] += \
894-
perf_counters_by_daemon[collection]['value']
895-
896-
# Check that 'count' exists, as not all counters have a count field.
897-
if 'count' in perf_counters_by_daemon[collection]:
898-
result[daemon_type][col_0][col_1]['count'] += \
899-
perf_counters_by_daemon[collection]['count']
900-
else:
901-
self.log.error('Incorrect mode specified in gather_perf_counters: {}'.format(mode))
902-
return {}
876+
result[daemon][collection] = []
877+
878+
for sub_collection in sub_collection_list:
879+
sub_collection_result: Dict[str, dict] = defaultdict(lambda: defaultdict(dict))
880+
sub_collection_result['labels'] = sub_collection['labels']
881+
for sub_collection_counter_name, sub_collection_counter_info in sub_collection['counters'].items():
882+
if mode == 'separated':
883+
# Add value to result
884+
sub_collection_result['counters'][sub_collection_counter_name]['value'] = \
885+
sub_collection_counter_info['value']
886+
887+
# Check that 'count' exists, as not all counters have a count field.
888+
if 'count' in sub_collection_counter_info:
889+
sub_collection_result['counters'][sub_collection_counter_name]['count'] = \
890+
sub_collection_counter_info['count']
891+
elif mode == 'aggregated':
892+
self.log.debug("telemetry in mode: agregated")
893+
# Not every rgw daemon has the same schema. Specifically, each rgw daemon
894+
# has a uniquely-named collection that starts off identically (i.e.
895+
# "objecter-0x...") then diverges (i.e. "...55f4e778e140.op_rmw").
896+
# This bit of code combines these unique counters all under one rgw instance.
897+
# Without this check, the schema would remain separeted out in the final report.
898+
if collection[0:11] == "objecter-0x":
899+
collection = "objecter-0x"
900+
901+
# Check that the value can be incremented. In some cases,
902+
# the files are of type 'pair' (real-integer-pair, integer-integer pair).
903+
# In those cases, the value is a dictionary, and not a number.
904+
# i.e. throttle-msgr_dispatch_throttler-hbserver["wait"]
905+
if isinstance(sub_collection_counter_info['value'], numbers.Number):
906+
sub_collection_result['counters'][sub_collection_counter_name]['value'] += \
907+
sub_collection_counter_info['value']
908+
909+
# Check that 'count' exists, as not all counters have a count field.
910+
if 'count' in sub_collection_counter_info:
911+
sub_collection_result['counters'][sub_collection_counter_name]['count'] += \
912+
sub_collection_counter_info['count']
913+
else:
914+
self.log.error('Incorrect mode specified in gather_perf_counters: {}'.format(mode))
915+
return {}
916+
917+
result[daemon][collection].append(sub_collection_result)
903918

904919
if mode == 'separated':
905920
# for debugging purposes only, this data is never reported

0 commit comments

Comments
 (0)