Skip to content

Commit 77211bd

Browse files
MarekPietakapi-no
authored andcommitted
scripts: nrf_profiler: Improve stats calculating script
Clean up the script implementation. Allow defining presets using a *.json file. Allow to filter outliers to improve UX for histogram plot. Jira: NCSDK-31712 Signed-off-by: Marek Pieta <[email protected]>
1 parent 40b35ca commit 77211bd

File tree

2 files changed

+173
-153
lines changed

2 files changed

+173
-153
lines changed

scripts/nrf_profiler/calc_stats.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,40 @@
77

88
import argparse
99
import logging
10+
import json
1011

1112

1213
def main():
13-
parser = argparse.ArgumentParser(description='Calculating stats for events.',
14+
parser = argparse.ArgumentParser(description="nRF Profiler event propagation statistics",
1415
allow_abbrev=False)
15-
parser.add_argument('dataset_name', help='Name of dataset')
16-
parser.add_argument('--start_time', help='Measurement start time[s]')
17-
parser.add_argument('--end_time', help='Measurement end time[s]')
18-
parser.add_argument('--log', help='Log level')
16+
parser.add_argument("dataset_name", help="Name of nRF Profiler dataset")
17+
parser.add_argument("test_presets", help="Test preset [*.json file]")
18+
parser.add_argument("--start_time", type=float, default=0.0,
19+
help="Measurement start time [s]")
20+
parser.add_argument("--end_time", type=float, default=float('inf'),
21+
help="Measurement end time [s]")
22+
parser.add_argument("--log", help="Log level")
1923
args = parser.parse_args()
2024

2125
if args.log is not None:
2226
log_lvl_number = int(getattr(logging, args.log.upper(), None))
2327
else:
2428
log_lvl_number = logging.INFO
2529

26-
if args.start_time is None:
27-
args.start_time = 0
28-
else:
29-
args.start_time = float(args.start_time)
30-
31-
if args.end_time is None:
32-
args.end_time = float('inf')
33-
else:
34-
args.end_time = float(args.end_time)
35-
36-
sn = StatsNordic(args.dataset_name + ".csv", args.dataset_name + ".json",
30+
try:
31+
with open(args.test_presets, "r") as test_preset_file:
32+
test_preset_dict = json.load(test_preset_file)
33+
except FileNotFoundError:
34+
print("File {} not found".format(args.test_presets))
35+
return
36+
except Exception as e:
37+
print("Exception while opening {}".format(args.test_presets))
38+
print(e)
39+
return
40+
41+
sn = StatsNordic("{}.csv".format(args.dataset_name), "{}.json".format(args.dataset_name),
3742
log_lvl_number)
38-
sn.calculate_stats_preset1(args.start_time, args.end_time)
43+
sn.calculate_stats(test_preset_dict, args.start_time, args.end_time)
3944

4045
if __name__ == "__main__":
4146
main()

scripts/nrf_profiler/stats_nordic.py

Lines changed: 151 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -4,184 +4,199 @@
44
# SPDX-License-Identifier: LicenseRef-Nordic-5-Clause
55

66
from processed_events import ProcessedEvents
7-
from enum import Enum
7+
from enum import StrEnum
88
import matplotlib.pyplot as plt
99
import numpy as np
1010
import logging
1111
import os
1212

1313

14-
OUTPUT_FOLDER = "data_stats/"
15-
16-
17-
class EventState(Enum):
18-
SUBMIT = 1
19-
PROC_START = 2
20-
PROC_END = 3
14+
OUTPUT_DIR_BASE = "data_stats"
2115

16+
class EventState(StrEnum):
17+
SUBMIT = "submit"
18+
PROCESSING_START = "processing_start"
19+
PROCESSING_END = "processing_end"
2220

2321
class StatsNordic():
2422
def __init__(self, events_filename, events_types_filename, log_lvl):
25-
self.data_name = events_filename.split('.')[0]
23+
self.dataset_name = events_filename.split(".")[0]
2624
self.processed_data = ProcessedEvents()
2725
self.processed_data.read_data_from_files(events_filename, events_types_filename)
2826

29-
self.logger = logging.getLogger('Stats Nordic')
27+
self.logger = logging.getLogger("Stats Nordic")
3028
self.logger_console = logging.StreamHandler()
3129
self.logger.setLevel(log_lvl)
32-
self.log_format = logging.Formatter(
33-
'[%(levelname)s] %(name)s: %(message)s')
30+
self.log_format = logging.Formatter("[%(levelname)s] %(name)s: %(message)s")
3431
self.logger_console.setFormatter(self.log_format)
3532
self.logger.addHandler(self.logger_console)
3633

37-
def calculate_stats_preset1(self, start_meas, end_meas):
38-
self.time_between_events("hid_mouse_event_dongle", EventState.SUBMIT,
39-
"hid_report_sent_event_device", EventState.SUBMIT,
40-
0.05, start_meas, end_meas)
41-
self.time_between_events("hid_mouse_event_dongle", EventState.SUBMIT,
42-
"hid_report_sent_event_device", EventState.SUBMIT,
43-
0.05, start_meas, end_meas)
44-
self.time_between_events("hid_report_sent_event_dongle", EventState.SUBMIT,
45-
"hid_report_sent_event_dongle", EventState.SUBMIT,
46-
0.05, start_meas, end_meas)
47-
self.time_between_events("hid_mouse_event_dongle", EventState.SUBMIT,
48-
"hid_report_sent_event_dongle", EventState.SUBMIT,
49-
0.05, start_meas, end_meas)
50-
self.time_between_events("hid_mouse_event_device", EventState.SUBMIT,
51-
"hid_mouse_event_dongle", EventState.SUBMIT,
52-
0.05, start_meas, end_meas)
53-
plt.show()
54-
55-
def _get_timestamps(self, event_name, event_state, start_meas, end_meas):
34+
def _get_timestamps(self, event_name, event_state, start_meas=0.0, end_meas=float("inf")):
5635
event_type_id = self.processed_data.get_event_type_id(event_name)
5736
if event_type_id is None:
58-
self.logger.error("Event name not found: " + event_name)
59-
return None
60-
if not self.processed_data.is_event_tracked(event_type_id) and event_state != EventState.SUBMIT:
61-
self.logger.error("This event is not tracked: " + event_name)
62-
return None
63-
64-
trackings = list(filter(lambda x:
65-
x.submit.type_id == event_type_id,
66-
self.processed_data.tracked_events))
37+
raise ValueError("Event not found in dataset: {}".format(event_name))
6738

6839
if not isinstance(event_state, EventState):
69-
self.logger.error("Event state should be EventState enum")
70-
return None
40+
raise ValueError("Invalid EventState: {}".format(event_state))
41+
42+
if event_state != EventState.SUBMIT:
43+
if not self.processed_data.is_event_tracked(event_type_id):
44+
raise ValueError("Event not tracked: {}".format(event_name))
45+
46+
trackings = list(filter(lambda x: x.submit.type_id == event_type_id,
47+
self.processed_data.tracked_events))
7148

7249
if event_state == EventState.SUBMIT:
7350
timestamps = np.fromiter(map(lambda x: x.submit.timestamp, trackings),
7451
dtype=float)
75-
elif event_state == EventState.PROC_START:
52+
elif event_state == EventState.PROCESSING_START:
7653
timestamps = np.fromiter(map(lambda x: x.proc_start_time, trackings),
7754
dtype=float)
78-
elif event_state == EventState.PROC_END:
55+
elif event_state == EventState.PROCESSING_END:
7956
timestamps = np.fromiter(map(lambda x: x.proc_end_time, trackings),
8057
dtype=float)
8158

82-
timestamps = timestamps[np.where((timestamps > start_meas)
83-
& (timestamps < end_meas))]
59+
timestamps = timestamps[np.nonzero((timestamps > start_meas) & (timestamps < end_meas))]
8460

8561
return timestamps
8662

87-
@staticmethod
88-
def calculate_times_between(start_times, end_times):
89-
if end_times[0] <= start_times[0]:
63+
def _calculate_times_between_ms(self, start_times, end_times):
64+
# Number of start times must match the number of end times.
65+
# Remove start/end time records to also ensure that start times are before end times.
66+
while end_times[0] <= start_times[0]:
67+
self.logger.warning("End time[0] <= start time[0], dropping the first end time")
9068
end_times = end_times[1:]
91-
if len(start_times) > len(end_times):
69+
70+
while len(start_times) > len(end_times):
71+
self.logger.warning("Start/end times length mismatch, dropping the last start time")
9272
start_times = start_times[:-1]
9373

94-
return (end_times - start_times) * 1000
74+
# Convert results to milliseconds
75+
times_between = (end_times - start_times) * 1000
76+
return times_between, start_times, end_times
77+
78+
@staticmethod
79+
def _get_outlier_filter_mask(measurements, std_factor=3):
80+
mean = np.mean(measurements)
81+
std = np.std(measurements)
82+
limit_min = mean - std_factor * std
83+
limit_max = mean + std_factor * std
84+
85+
return np.nonzero((measurements > limit_min) & (measurements < limit_max))
9586

9687
@staticmethod
97-
def prepare_stats_txt(times_between):
98-
stats_text = "Max time: "
99-
stats_text += "{0:.3f}".format(max(times_between)) + "ms\n"
100-
stats_text += "Min time: "
101-
stats_text += "{0:.3f}".format(min(times_between)) + "ms\n"
102-
stats_text += "Mean time: "
103-
stats_text += "{0:.3f}".format(np.mean(times_between)) + "ms\n"
104-
stats_text += "Std dev of time: "
105-
stats_text += "{0:.3f}".format(np.std(times_between)) + "ms\n"
106-
stats_text += "Median time: "
107-
stats_text += "{0:.3f}".format(np.median(times_between)) + "ms\n"
108-
stats_text += "Number of records: {}".format(len(times_between)) + "\n"
109-
110-
return stats_text
111-
112-
def time_between_events(self, start_event_name, start_event_state,
113-
end_event_name, end_event_state, hist_bin_width=0.01,
114-
start_meas=0, end_meas=float('inf')):
115-
self.logger.info("Stats calculating: {}->{}".format(start_event_name,
116-
end_event_name))
117-
118-
start_times = self._get_timestamps(start_event_name, start_event_state,
119-
start_meas, end_meas)
120-
end_times = self._get_timestamps(end_event_name, end_event_state,
121-
start_meas, end_meas)
122-
123-
if start_times is None or end_times is None:
124-
return
125-
126-
if len(start_times) == 0:
127-
self.logger.error("No events logged: " + start_event_name)
128-
return
129-
130-
if len(end_times) == 0:
131-
self.logger.error("No events logged: " + end_event_name)
132-
return
133-
134-
if len(start_times) != len(end_times):
135-
self.logger.error("Number of start_times and end_times is not equal")
136-
self.logger.error("Got {} start_times and {} end_times".format(
137-
len(start_times), len(end_times)))
138-
139-
return
140-
141-
times_between = self.calculate_times_between(start_times, end_times)
142-
stats_text = self.prepare_stats_txt(times_between)
143-
144-
plt.figure()
145-
146-
ax = plt.gca()
147-
ax.text(0.05,
148-
0.95,
149-
stats_text,
150-
transform=ax.transAxes,
151-
fontsize=12,
152-
verticalalignment='top',
153-
bbox=dict(boxstyle='round',
154-
alpha=0.5,
155-
facecolor='linen'))
156-
157-
plt.xlabel('Duration[ms]')
158-
plt.ylabel('Number of occurrences')
159-
160-
event_status_str = {
161-
EventState.SUBMIT : "submission",
162-
EventState.PROC_START : "processing start",
163-
EventState.PROC_END : "processing end"
88+
def _times_between_to_stats_txt(preset_desc, output_dir, times_between_ms, out_filename="stats",
89+
logger=None):
90+
stats = {
91+
"Max" : "{:.3f} ms".format(max(times_between_ms)),
92+
"Min" : "{:.3f} ms".format(min(times_between_ms)),
93+
"Mean" : "{:.3f} ms".format(np.mean(times_between_ms)),
94+
"Std dev" : "{:.3f} ms".format(np.std(times_between_ms)),
95+
"Median" : "{:.3f} ms".format(np.median(times_between_ms)),
96+
"Number of events" : str(len(times_between_ms))
16497
}
16598

166-
title = "From " + start_event_name + ' ' + \
167-
event_status_str[start_event_state] + "\nto " + \
168-
end_event_name + ' ' + event_status_str[end_event_state] + \
169-
' (' + self.data_name + ')'
170-
plt.title(title)
171-
plt.hist(times_between, bins = (int)((max(times_between) - min(times_between))
172-
/ hist_bin_width))
99+
key_len_max = max(len(k) for k in stats)
100+
val_len_max = max(len(v) for v in stats.values())
101+
102+
out_filepath = os.path.join(output_dir, "{}.txt".format(out_filename))
173103

174-
plt.yscale('log')
104+
with open(out_filepath, "w") as f:
105+
f.write("Event propagation times statistics - {}\n".format(preset_desc))
106+
for s in stats:
107+
f.write("{} {}\n".format(s.ljust(key_len_max), stats[s].rjust(val_len_max)))
108+
109+
if logger is not None:
110+
with open(out_filepath, "r") as f:
111+
logger.info(f.read())
112+
113+
@staticmethod
114+
def _times_between_to_histogram(preset_desc, output_dir, times_between_ms,
115+
out_filename="histogram"):
116+
bin_width_ms = 0.01
117+
118+
plt.xlabel("Time between events [ms]")
119+
plt.ylabel("Number of occurrences")
120+
plt.title(preset_desc)
175121
plt.grid(True)
176122

177-
if end_meas == float('inf'):
178-
end_meas_string = 'inf'
123+
plt.hist(times_between_ms, bins=np.arange(min(times_between_ms),
124+
max(times_between_ms) + bin_width_ms,
125+
bin_width_ms))
126+
127+
plt.savefig(os.path.join(output_dir, "{}.png".format(out_filename)))
128+
129+
# Save the same plot in log scale
130+
plt.yscale("log")
131+
plt.savefig(os.path.join(output_dir, "{}_log.png".format(out_filename)))
132+
133+
@staticmethod
134+
def _test_preset_parse_event(event_dict):
135+
if event_dict is None:
136+
return None, None
137+
138+
name = event_dict.get("name")
139+
state = event_dict.get("state")
140+
141+
if state is not None:
142+
state = EventState(state)
179143
else:
180-
end_meas_string = int(end_meas)
181-
dir_name = "{}{}_{}_{}/".format(OUTPUT_FOLDER, self.data_name,
182-
int(start_meas), end_meas_string)
183-
if not os.path.exists(dir_name):
184-
os.makedirs(dir_name)
185-
186-
plt.savefig(dir_name +
187-
title.lower().replace(' ', '_').replace('\n', '_') +'.png')
144+
# Assume "submit" event state by default
145+
state = EventState.SUBMIT
146+
147+
return name, state
148+
149+
def _test_preset_execute(self, test_preset, start_time, end_time):
150+
# Load preset description
151+
preset_name = test_preset.get("name")
152+
start_evt_name, start_evt_state = \
153+
StatsNordic._test_preset_parse_event(test_preset.get("start_event"))
154+
end_evt_name, end_evt_state = \
155+
StatsNordic._test_preset_parse_event(test_preset.get("end_event"))
156+
157+
# Validate preset description
158+
if preset_name is None:
159+
raise ValueError("Invalid preset: No preset name")
160+
if start_evt_name is None:
161+
raise ValueError("Invalid preset: No start event name ({})".format(preset_name))
162+
if end_evt_name is None:
163+
raise ValueError("Invalid preset: No end event name ({})".format(preset_name))
164+
165+
preset_desc = "{}: {}({})->{}({})".format(preset_name, start_evt_name, start_evt_state,
166+
end_evt_name, end_evt_state)
167+
self.logger.info("Execute test preset {}".format(preset_desc))
168+
169+
# Calculate event propagation times
170+
ts_start = self._get_timestamps(start_evt_name, start_evt_state, start_time, end_time)
171+
ts_end = self._get_timestamps(end_evt_name, end_evt_state, start_time, end_time)
172+
times_between_ms, ts_start, ts_end = self._calculate_times_between_ms(ts_start, ts_end)
173+
174+
# Prepare output directory
175+
output_dir1 = "{}_from_{}_to_{}".format(self.dataset_name, str(start_time), str(end_time))
176+
output_dir1 = output_dir1.replace(".", "_")
177+
178+
output_dir2 = preset_name.lower().replace(" ", "_").replace(".", "_")
179+
180+
output_dir = os.path.join(OUTPUT_DIR_BASE, output_dir1, output_dir2)
181+
os.makedirs(output_dir, exist_ok=True)
182+
183+
# Filter out outliers
184+
mask = StatsNordic._get_outlier_filter_mask(times_between_ms)
185+
times_between_ms_no_outliers = times_between_ms[mask]
186+
187+
# Store results in the output directory
188+
StatsNordic._times_between_to_stats_txt(preset_desc, output_dir, times_between_ms,
189+
logger=self.logger)
190+
StatsNordic._times_between_to_histogram(preset_desc + " no outliers", output_dir,
191+
times_between_ms_no_outliers,
192+
out_filename="histogram_no_outliers")
193+
194+
# Display all of the figures to the user
195+
plt.show()
196+
197+
def calculate_stats(self, test_presets, start_time, end_time):
198+
for t in test_presets:
199+
try:
200+
self._test_preset_execute(t, start_time, end_time)
201+
except Exception as e:
202+
self.logger.warning("Exception: {}".format(e))

0 commit comments

Comments
 (0)