Skip to content

Commit 1fe39ad

Browse files
committed
Multi config support for runner
1 parent 2432ead commit 1fe39ad

File tree

1 file changed

+134
-133
lines changed

1 file changed

+134
-133
lines changed

runner.py

Lines changed: 134 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
)
1818

1919

20-
def verbose_print(text):
20+
def verbose_print(text, **kwargs):
2121
global verbose_mode
2222
if verbose_mode:
23-
print(text)
23+
print(text, **kwargs)
2424

2525

2626
def filter_stderr(text):
@@ -97,9 +97,9 @@ def is_ht_enabled():
9797

9898

9999
parser = argparse.ArgumentParser()
100-
parser.add_argument('--config', metavar='ConfigPath',
101-
type=argparse.FileType('r'), default='config_example.json',
102-
help='Path to configuration file')
100+
parser.add_argument('--configs', metavar='ConfigPath', type=str,
101+
default='configs/config_example.json',
102+
help='Path to configuration files')
103103
parser.add_argument('--dummy-run', default=False, action='store_true',
104104
help='Run configuration parser and datasets generation'
105105
'without benchmarks running')
@@ -112,12 +112,6 @@ def is_ht_enabled():
112112
env = os.environ.copy()
113113
verbose_mode = args.verbose
114114

115-
with open(args.config.name, 'r') as config_file:
116-
config = json.load(config_file)
117-
118-
if 'omp_env' not in config.keys():
119-
config['omp_env'] = []
120-
121115
# make directory for data if it doesn't exist
122116
os.makedirs('data', exist_ok=True)
123117

@@ -194,130 +188,137 @@ def is_ht_enabled():
194188
'OMP_NUM_THREADS': omp_num_threads
195189
}
196190

197-
# get parameters that are common for all cases
198-
common_params = config['common']
199-
for params_set in config['cases']:
200-
cases = ['']
201-
params = common_params.copy()
202-
params.update(params_set.copy())
203-
algorithm = params['algorithm']
204-
libs = params['lib']
205-
del params['dataset'], params['algorithm'], params['lib']
206-
generate_cases(params)
207-
verbose_print(f'{algorithm} algorithm: {len(libs) * len(cases)} case(s),'
208-
f' {len(params_set["dataset"])} dataset(s)\n')
209-
210-
for dataset in params_set['dataset']:
211-
if dataset['source'] in ['csv', 'npy']:
212-
paths = f'--file-X-train {dataset["training"]["x"]}'
213-
if 'y' in dataset['training'].keys():
214-
paths += f' --file-y-train {dataset["training"]["y"]}'
215-
if 'testing' in dataset.keys():
216-
paths += f' --file-X-test {dataset["testing"]["x"]}'
217-
if 'y' in dataset['testing'].keys():
218-
paths += f' --file-y-test {dataset["testing"]["y"]}'
219-
if 'name' in dataset.keys():
220-
dataset_name = dataset['name']
221-
else:
222-
dataset_name = 'unknown'
223-
elif dataset['source'] == 'synthetic':
224-
class GenerationArgs:
225-
pass
226-
gen_args = GenerationArgs()
227-
paths = ''
228-
229-
if 'seed' in params_set.keys():
230-
gen_args.seed = params_set['seed']
231-
else:
232-
gen_args.seed = 777
233-
234-
# default values
235-
gen_args.clusters = 10
236-
gen_args.type = dataset['type']
237-
gen_args.samples = dataset['training']['n_samples']
238-
gen_args.features = dataset['n_features']
239-
if 'n_classes' in dataset.keys():
240-
gen_args.classes = dataset['n_classes']
241-
cls_num_for_file = f'-{dataset["n_classes"]}'
242-
elif 'n_clusters' in dataset.keys():
243-
gen_args.clusters = dataset['n_clusters']
244-
cls_num_for_file = f'-{dataset["n_clusters"]}'
245-
else:
246-
cls_num_for_file = ''
247-
248-
file_prefix = f'data/synthetic-{gen_args.type}{cls_num_for_file}-'
249-
file_postfix = f'-{gen_args.samples}x{gen_args.features}.npy'
250-
251-
if gen_args.type == 'kmeans':
252-
gen_args.node_id = 0
253-
gen_args.filei = f'{file_prefix}init{file_postfix}'
254-
paths += f'--filei {gen_args.filei}'
255-
gen_args.filet = f'{file_prefix}threshold{file_postfix}'
256-
257-
gen_args.filex = f'{file_prefix}X-train{file_postfix}'
258-
paths += f' --file-X-train {gen_args.filex}'
259-
if gen_args.type not in ['kmeans', 'blobs']:
260-
gen_args.filey = f'{file_prefix}y-train{file_postfix}'
261-
paths += f' --file-y-train {gen_args.filey}'
262-
263-
if 'testing' in dataset.keys():
264-
gen_args.test_samples = dataset['testing']['n_samples']
265-
gen_args.filextest = f'{file_prefix}X-test{file_postfix}'
266-
paths += f' --file-X-test {gen_args.filextest}'
191+
for config_name in args.configs.split(','):
192+
verbose_print(f'Config: {config_name}')
193+
with open(config_name, 'r') as config_file:
194+
config = json.load(config_file)
195+
196+
if 'omp_env' not in config.keys():
197+
config['omp_env'] = []
198+
# get parameters that are common for all cases
199+
common_params = config['common']
200+
for params_set in config['cases']:
201+
cases = ['']
202+
params = common_params.copy()
203+
params.update(params_set.copy())
204+
algorithm = params['algorithm']
205+
libs = params['lib']
206+
del params['dataset'], params['algorithm'], params['lib']
207+
generate_cases(params)
208+
verbose_print(f'{algorithm} algorithm: {len(libs) * len(cases)} case(s),'
209+
f' {len(params_set["dataset"])} dataset(s)\n')
210+
211+
for dataset in params_set['dataset']:
212+
if dataset['source'] in ['csv', 'npy']:
213+
paths = f'--file-X-train {dataset["training"]["x"]}'
214+
if 'y' in dataset['training'].keys():
215+
paths += f' --file-y-train {dataset["training"]["y"]}'
216+
if 'testing' in dataset.keys():
217+
paths += f' --file-X-test {dataset["testing"]["x"]}'
218+
if 'y' in dataset['testing'].keys():
219+
paths += f' --file-y-test {dataset["testing"]["y"]}'
220+
if 'name' in dataset.keys():
221+
dataset_name = dataset['name']
222+
else:
223+
dataset_name = 'unknown'
224+
elif dataset['source'] == 'synthetic':
225+
class GenerationArgs:
226+
pass
227+
gen_args = GenerationArgs()
228+
paths = ''
229+
230+
if 'seed' in params_set.keys():
231+
gen_args.seed = params_set['seed']
232+
else:
233+
gen_args.seed = 777
234+
235+
# default values
236+
gen_args.clusters = 10
237+
gen_args.type = dataset['type']
238+
gen_args.samples = dataset['training']['n_samples']
239+
gen_args.features = dataset['n_features']
240+
if 'n_classes' in dataset.keys():
241+
gen_args.classes = dataset['n_classes']
242+
cls_num_for_file = f'-{dataset["n_classes"]}'
243+
elif 'n_clusters' in dataset.keys():
244+
gen_args.clusters = dataset['n_clusters']
245+
cls_num_for_file = f'-{dataset["n_clusters"]}'
246+
else:
247+
cls_num_for_file = ''
248+
249+
file_prefix = f'data/synthetic-{gen_args.type}{cls_num_for_file}-'
250+
file_postfix = f'-{gen_args.samples}x{gen_args.features}.npy'
251+
252+
if gen_args.type == 'kmeans':
253+
gen_args.node_id = 0
254+
gen_args.filei = f'{file_prefix}init{file_postfix}'
255+
paths += f'--filei {gen_args.filei}'
256+
gen_args.filet = f'{file_prefix}threshold{file_postfix}'
257+
258+
gen_args.filex = f'{file_prefix}X-train{file_postfix}'
259+
paths += f' --file-X-train {gen_args.filex}'
267260
if gen_args.type not in ['kmeans', 'blobs']:
268-
gen_args.fileytest = f'{file_prefix}y-test{file_postfix}'
269-
paths += f' --file-y-test {gen_args.fileytest}'
261+
gen_args.filey = f'{file_prefix}y-train{file_postfix}'
262+
paths += f' --file-y-train {gen_args.filey}'
263+
264+
if 'testing' in dataset.keys():
265+
gen_args.test_samples = dataset['testing']['n_samples']
266+
gen_args.filextest = f'{file_prefix}X-test{file_postfix}'
267+
paths += f' --file-X-test {gen_args.filextest}'
268+
if gen_args.type not in ['kmeans', 'blobs']:
269+
gen_args.fileytest = f'{file_prefix}y-test{file_postfix}'
270+
paths += f' --file-y-test {gen_args.fileytest}'
271+
else:
272+
gen_args.test_samples = 0
273+
gen_args.filextest = gen_args.filex
274+
if gen_args.type not in ['kmeans', 'blobs']:
275+
gen_args.fileytest = gen_args.filey
276+
277+
if not args.dummy_run and not os.path.isfile(gen_args.filex):
278+
if gen_args.type == 'regression':
279+
gen_regression(gen_args)
280+
elif gen_args.type == 'classification':
281+
gen_classification(gen_args)
282+
elif gen_args.type == 'kmeans':
283+
gen_kmeans(gen_args)
284+
elif gen_args.type == 'blobs':
285+
gen_blobs(gen_args)
286+
dataset_name = f'synthetic_{gen_args.type}'
270287
else:
271-
gen_args.test_samples = 0
272-
gen_args.filextest = gen_args.filex
273-
if gen_args.type not in ['kmeans', 'blobs']:
274-
gen_args.fileytest = gen_args.filey
275-
276-
if not args.dummy_run and not os.path.isfile(gen_args.filex):
277-
if gen_args.type == 'regression':
278-
gen_regression(gen_args)
279-
elif gen_args.type == 'classification':
280-
gen_classification(gen_args)
281-
elif gen_args.type == 'kmeans':
282-
gen_kmeans(gen_args)
283-
elif gen_args.type == 'blobs':
284-
gen_blobs(gen_args)
285-
dataset_name = f'synthetic_{gen_args.type}'
286-
else:
287-
raise ValueError(
288-
'Unknown dataset source. Only synthetics datasets '
289-
'and csv/npy files are supported now')
290-
for lib in libs:
291-
env = os.environ.copy()
292-
if lib == 'xgboost':
293-
for var in config['omp_env']:
294-
env[var] = omp_env[var]
295-
for i, case in enumerate(cases):
296-
command = f'python {lib}/{algorithm}.py --batch {batch} ' \
297-
+ f'--arch {hostname} --header --output-format ' \
298-
+ f'{args.output_format}{case} {paths} ' \
299-
+ f'--dataset-name {dataset_name}'
300-
while ' ' in command:
301-
command = command.replace(' ', ' ')
302-
verbose_print(command)
303-
if not args.dummy_run:
304-
case = f'{lib},{algorithm} ' + case
305-
stdout, stderr = read_output_from_command(command)
306-
stdout, extra_stdout = filter_stdout(stdout)
307-
stderr = filter_stderr(stderr)
308-
if extra_stdout != '':
309-
stderr += f'CASE {case} EXTRA OUTPUT:\n' \
310-
+ f'{extra_stdout}\n'
311-
if args.output_format == 'json':
312-
try:
313-
json_result['results'].extend(json.loads(stdout))
314-
except json.JSONDecodeError as decoding_exception:
315-
stderr += f'CASE {case} JSON DECODING ERROR:\n' \
316-
+ f'{decoding_exception}\n{stdout}\n'
317-
elif args.output_format == 'csv':
318-
csv_result += stdout + '\n'
319-
if stderr != '':
320-
print(stderr, file=sys.stderr)
288+
raise ValueError(
289+
'Unknown dataset source. Only synthetics datasets '
290+
'and csv/npy files are supported now')
291+
for lib in libs:
292+
env = os.environ.copy()
293+
if lib == 'xgboost':
294+
for var in config['omp_env']:
295+
env[var] = omp_env[var]
296+
for i, case in enumerate(cases):
297+
command = f'python {lib}/{algorithm}.py --batch {batch} ' \
298+
+ f'--arch {hostname} --header --output-format ' \
299+
+ f'{args.output_format}{case} {paths} ' \
300+
+ f'--dataset-name {dataset_name}'
301+
while ' ' in command:
302+
command = command.replace(' ', ' ')
303+
verbose_print(command)
304+
if not args.dummy_run:
305+
case = f'{lib},{algorithm} ' + case
306+
stdout, stderr = read_output_from_command(command)
307+
stdout, extra_stdout = filter_stdout(stdout)
308+
stderr = filter_stderr(stderr)
309+
if extra_stdout != '':
310+
stderr += f'CASE {case} EXTRA OUTPUT:\n' \
311+
+ f'{extra_stdout}\n'
312+
if args.output_format == 'json':
313+
try:
314+
json_result['results'].extend(json.loads(stdout))
315+
except json.JSONDecodeError as decoding_exception:
316+
stderr += f'CASE {case} JSON DECODING ERROR:\n' \
317+
+ f'{decoding_exception}\n{stdout}\n'
318+
elif args.output_format == 'csv':
319+
csv_result += stdout + '\n'
320+
if stderr != '':
321+
print(stderr, file=sys.stderr)
321322

322323
if args.output_format == 'json':
323324
json_result = json.dumps(json_result, indent=4)

0 commit comments

Comments
 (0)