Skip to content

Commit 9d1988c

Browse files
authored
Report generator (#38)
1 parent 3d91028 commit 9d1988c

File tree

3 files changed

+242
-0
lines changed

3 files changed

+242
-0
lines changed

report_generator/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Report generator for scikit-learn_bench
2+
3+
Report generator produces Excel table file from json benchmark log files.
4+
5+
Run `python report_generator.py --result-files bench_log_1.json,bench_log_2.json [--report-file new_report.xslx --generation-config gen_config.json --merging none]` to launch report generation.
6+
7+
runner options:
8+
* ``result-files`` : comma-separated benchmark json result file paths
9+
* ``report-file`` : report file path
10+
* ``generation-config`` : generation configuration file path
11+
* ``merging``: *full*, *none*, *sw_only*, *hw_only*. How to merge same cases in benchmark logs
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"align": [
3+
"algorithm",
4+
"stage",
5+
"input_data:data_order",
6+
"input_data:data_type",
7+
"input_data:dataset_name",
8+
"input_data:rows",
9+
"input_data:columns",
10+
"input_data:classes",
11+
"input_data:n_clusters"
12+
],
13+
"diff": [
14+
"software_hash",
15+
"hardware_hash",
16+
"measurement_time"
17+
]
18+
}

report_generator/report_generator.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
import openpyxl
2+
import argparse
3+
import json
4+
import hashlib
5+
from string import ascii_uppercase
6+
import datetime
7+
8+
9+
def get_property(entry, prop):
10+
keys = prop.split(':')
11+
value = entry
12+
for key in keys:
13+
value = value[key]
14+
return value
15+
16+
17+
def result_entries_have_same_values(first_entry, second_entry, props, error_on_missing=True):
18+
res = True
19+
for prop in props:
20+
try:
21+
res = res and (get_property(first_entry, prop) == get_property(second_entry, prop))
22+
except KeyError:
23+
if error_on_missing:
24+
raise KeyError()
25+
return res
26+
27+
28+
def result_entries_are_equal(first_entry, second_entry, config):
29+
props = config['align'] + config['diff']
30+
return result_entries_have_same_values(first_entry, second_entry, props, True)
31+
32+
33+
def result_entries_are_comparable(first_entry, second_entry, config):
34+
props = config['align']
35+
return result_entries_have_same_values(first_entry, second_entry, props, False)
36+
37+
38+
def result_entries_have_same_diff(first_entry, second_entry, config):
39+
props = config['diff']
40+
return result_entries_have_same_values(first_entry, second_entry, props, False)
41+
42+
43+
def results_are_mergeable(first_res, second_res, merging):
44+
hw_hash_equality = first_res['hardware_hash'] == second_res['hardware_hash']
45+
sw_hash_equality = first_res['software_hash'] == second_res['software_hash']
46+
if merging == 'hw_only':
47+
return hw_hash_equality
48+
elif merging == 'sw_only':
49+
return sw_hash_equality
50+
else:
51+
return sw_hash_equality and hw_hash_equality
52+
53+
54+
excel_header_columns = list(ascii_uppercase)
55+
for sym1 in ascii_uppercase:
56+
for sym2 in ascii_uppercase:
57+
excel_header_columns.append(sym1+sym2)
58+
xy_to_excel_cell = lambda x, y: '{}{}'.format(excel_header_columns[x], y + 1)
59+
60+
61+
def write_cell(work_sheet, x, y, value):
62+
work_sheet[xy_to_excel_cell(x, y)] = value
63+
64+
65+
def create_list(res_entry, props_list):
66+
line = []
67+
for prop in props_list:
68+
try:
69+
val = get_property(res_entry, prop)
70+
except:
71+
val = ''
72+
line.append(val)
73+
return line
74+
75+
76+
parser = argparse.ArgumentParser()
77+
parser.add_argument('--result-files', type=str, required=True,
78+
'Benchmark result file names '
79+
'separated by commas')
80+
parser.add_argument('--report-file', type=str,
81+
default=f'report_{str(datetime.date.today())}.xlsx')
82+
parser.add_argument('--generation-config', type=str,
83+
default='default_report_gen_config.json')
84+
parser.add_argument('--merging', type=str, default='none',
85+
choices=('full', 'none', 'sw_only', 'hw_only'))
86+
args = parser.parse_args()
87+
88+
json_results = []
89+
for file_name in args.result_files.split(','):
90+
with open(file_name, 'r') as file:
91+
json_results.append(json.load(file))
92+
93+
with open(args.generation_config, 'r') as file:
94+
gen_config = json.load(file)
95+
96+
wb = openpyxl.Workbook()
97+
98+
# compute hash for software and hardware configurations
99+
HASH_LIMIT = 8
100+
for i, json_res in enumerate(json_results):
101+
for ware in ['software', 'hardware']:
102+
h = hashlib.sha256()
103+
h.update(bytes(str(json_res[ware]), encoding='utf-8'))
104+
json_res[f'{ware}_hash'] = h.hexdigest()[:HASH_LIMIT]
105+
106+
# create list of all result entry from all json logs
107+
all_res_entries = []
108+
for i, json_res in enumerate(json_results):
109+
extra_entry_info = json_res.copy()
110+
del extra_entry_info['results']
111+
for res_entry in json_res['results']:
112+
new_res_entry = res_entry.copy()
113+
new_res_entry.update(extra_entry_info)
114+
all_res_entries.append(new_res_entry)
115+
116+
if args.merging != 'none':
117+
for i, resi_entry in enumerate(all_res_entries):
118+
already_exist = False
119+
for j, resj_entry in enumerate(all_res_entries):
120+
if i == j or resi_entry == {} or resj_entry == {}:
121+
continue
122+
if result_entries_are_equal(resi_entry, resj_entry, gen_config):
123+
if resi_entry['measurement_time'] < resj_entry['measurement_time']:
124+
resi_entry = resj_entry
125+
resj_entry = {}
126+
127+
while {} in all_res_entries:
128+
all_res_entries.remove({})
129+
130+
diff_combinations = []
131+
for i, res_entry in enumerate(all_res_entries):
132+
already_exist = False
133+
for diff_comb in diff_combinations:
134+
if result_entries_have_same_diff(res_entry, diff_comb, gen_config):
135+
already_exist = True
136+
break
137+
if not already_exist:
138+
diff_comb = res_entry.copy()
139+
diff_combinations.append(diff_comb)
140+
141+
align_combinations = []
142+
for i, res_entry in enumerate(all_res_entries):
143+
already_exist = False
144+
for align_comb in align_combinations:
145+
if result_entries_are_comparable(res_entry, align_comb, gen_config):
146+
already_exist = True
147+
break
148+
if not already_exist:
149+
align_comb = res_entry.copy()
150+
align_combinations.append(align_comb)
151+
152+
HEAD_OFFSET = len(gen_config['diff'])
153+
LEFT_OFFSET = len(gen_config['align'])
154+
155+
stages_splitter = {
156+
'training': ['training', 'computation'],
157+
'inference': ['prediction', 'transformation', 'search']
158+
}
159+
160+
for stage_key in stages_splitter.keys():
161+
ws = wb.create_sheet(title=f'Results ({stage_key})')
162+
163+
for i, col in enumerate(gen_config['align'] + ['time, s']):
164+
write_cell(ws, i, HEAD_OFFSET, col)
165+
166+
for i, row in enumerate(gen_config['diff']):
167+
write_cell(ws, LEFT_OFFSET - 1, i, row)
168+
169+
stage_align_combinations = align_combinations.copy()
170+
171+
for align_comb in stage_align_combinations:
172+
if align_comb['stage'] not in stages_splitter[stage_key]:
173+
stage_align_combinations.remove(align_comb)
174+
175+
for i, align_comb in enumerate(stage_align_combinations):
176+
arr = create_list(align_comb, gen_config['align'])
177+
for j, el in enumerate(arr):
178+
write_cell(ws, j, HEAD_OFFSET + 1 + i, el)
179+
180+
for i, diff_comb in enumerate(diff_combinations):
181+
arr = create_list(diff_comb, gen_config['diff'])
182+
for j, el in enumerate(arr):
183+
write_cell(ws, LEFT_OFFSET + i, j, el)
184+
185+
for i, res_entry in enumerate(all_res_entries):
186+
if res_entry['stage'] not in stages_splitter[stage_key]:
187+
continue
188+
x, y = None, None
189+
for j, align_comb in enumerate(stage_align_combinations):
190+
if result_entries_are_comparable(res_entry, align_comb, gen_config):
191+
y = j
192+
break
193+
for j, diff_comb in enumerate(diff_combinations):
194+
if result_entries_have_same_diff(res_entry, diff_comb, gen_config):
195+
x = j
196+
break
197+
write_cell(ws, LEFT_OFFSET + x, HEAD_OFFSET + 1 + y, res_entry['time[s]'])
198+
199+
# write configs
200+
for i, json_res in enumerate(json_results):
201+
ws = wb.create_sheet(title=f'SW config n{i}_{json_res['software_hash']}')
202+
ws[xy_to_excel_cell(0, 0)] = f'Software configuration {i} (hash: {json_res['software_hash']})'
203+
sw_conf = json.dumps(json_res['software'], indent=4).split('\n')
204+
for j in range(len(sw_conf)):
205+
ws[xy_to_excel_cell(0, 1 + j)] = sw_conf[j]
206+
207+
ws = wb.create_sheet(title=f'HW config n{i}_{json_res['hardware_hash']}')
208+
ws[xy_to_excel_cell(0, 0)] = f'Hardware configuration {i} (hash: {json_res['hardware_hash']})'
209+
hw_conf = json.dumps(json_res['hardware'], indent=4).split('\n')
210+
for j in range(len(hw_conf)):
211+
ws[xy_to_excel_cell(0, 1 + j)] = hw_conf[j]
212+
213+
wb.save(args.report_file)

0 commit comments

Comments
 (0)