17
17
)
18
18
19
19
20
- def verbose_print (text ):
20
+ def verbose_print (text , ** kwargs ):
21
21
global verbose_mode
22
22
if verbose_mode :
23
- print (text )
23
+ print (text , ** kwargs )
24
24
25
25
26
26
def filter_stderr (text ):
@@ -97,9 +97,9 @@ def is_ht_enabled():
97
97
98
98
99
99
parser = argparse .ArgumentParser ()
100
- parser .add_argument ('--config ' , metavar = 'ConfigPath' ,
101
- type = argparse . FileType ( 'r' ), default = 'config_example.json' ,
102
- help = 'Path to configuration file ' )
100
+ parser .add_argument ('--configs ' , metavar = 'ConfigPath' , type = str ,
101
+ default = 'configs/ config_example.json' ,
102
+ help = 'Path to configuration files ' )
103
103
parser .add_argument ('--dummy-run' , default = False , action = 'store_true' ,
104
104
help = 'Run configuration parser and datasets generation'
105
105
'without benchmarks running' )
@@ -112,12 +112,6 @@ def is_ht_enabled():
112
112
env = os .environ .copy ()
113
113
verbose_mode = args .verbose
114
114
115
- with open (args .config .name , 'r' ) as config_file :
116
- config = json .load (config_file )
117
-
118
- if 'omp_env' not in config .keys ():
119
- config ['omp_env' ] = []
120
-
121
115
# make directory for data if it doesn't exist
122
116
os .makedirs ('data' , exist_ok = True )
123
117
@@ -194,130 +188,137 @@ def is_ht_enabled():
194
188
'OMP_NUM_THREADS' : omp_num_threads
195
189
}
196
190
197
- # get parameters that are common for all cases
198
- common_params = config ['common' ]
199
- for params_set in config ['cases' ]:
200
- cases = ['' ]
201
- params = common_params .copy ()
202
- params .update (params_set .copy ())
203
- algorithm = params ['algorithm' ]
204
- libs = params ['lib' ]
205
- del params ['dataset' ], params ['algorithm' ], params ['lib' ]
206
- generate_cases (params )
207
- verbose_print (f'{ algorithm } algorithm: { len (libs ) * len (cases )} case(s),'
208
- f' { len (params_set ["dataset" ])} dataset(s)\n ' )
209
-
210
- for dataset in params_set ['dataset' ]:
211
- if dataset ['source' ] in ['csv' , 'npy' ]:
212
- paths = f'--file-X-train { dataset ["training" ]["x" ]} '
213
- if 'y' in dataset ['training' ].keys ():
214
- paths += f' --file-y-train { dataset ["training" ]["y" ]} '
215
- if 'testing' in dataset .keys ():
216
- paths += f' --file-X-test { dataset ["testing" ]["x" ]} '
217
- if 'y' in dataset ['testing' ].keys ():
218
- paths += f' --file-y-test { dataset ["testing" ]["y" ]} '
219
- if 'name' in dataset .keys ():
220
- dataset_name = dataset ['name' ]
221
- else :
222
- dataset_name = 'unknown'
223
- elif dataset ['source' ] == 'synthetic' :
224
- class GenerationArgs :
225
- pass
226
- gen_args = GenerationArgs ()
227
- paths = ''
228
-
229
- if 'seed' in params_set .keys ():
230
- gen_args .seed = params_set ['seed' ]
231
- else :
232
- gen_args .seed = 777
233
-
234
- # default values
235
- gen_args .clusters = 10
236
- gen_args .type = dataset ['type' ]
237
- gen_args .samples = dataset ['training' ]['n_samples' ]
238
- gen_args .features = dataset ['n_features' ]
239
- if 'n_classes' in dataset .keys ():
240
- gen_args .classes = dataset ['n_classes' ]
241
- cls_num_for_file = f'-{ dataset ["n_classes" ]} '
242
- elif 'n_clusters' in dataset .keys ():
243
- gen_args .clusters = dataset ['n_clusters' ]
244
- cls_num_for_file = f'-{ dataset ["n_clusters" ]} '
245
- else :
246
- cls_num_for_file = ''
247
-
248
- file_prefix = f'data/synthetic-{ gen_args .type } { cls_num_for_file } -'
249
- file_postfix = f'-{ gen_args .samples } x{ gen_args .features } .npy'
250
-
251
- if gen_args .type == 'kmeans' :
252
- gen_args .node_id = 0
253
- gen_args .filei = f'{ file_prefix } init{ file_postfix } '
254
- paths += f'--filei { gen_args .filei } '
255
- gen_args .filet = f'{ file_prefix } threshold{ file_postfix } '
256
-
257
- gen_args .filex = f'{ file_prefix } X-train{ file_postfix } '
258
- paths += f' --file-X-train { gen_args .filex } '
259
- if gen_args .type not in ['kmeans' , 'blobs' ]:
260
- gen_args .filey = f'{ file_prefix } y-train{ file_postfix } '
261
- paths += f' --file-y-train { gen_args .filey } '
262
-
263
- if 'testing' in dataset .keys ():
264
- gen_args .test_samples = dataset ['testing' ]['n_samples' ]
265
- gen_args .filextest = f'{ file_prefix } X-test{ file_postfix } '
266
- paths += f' --file-X-test { gen_args .filextest } '
191
+ for config_name in args .configs .split (',' ):
192
+ verbose_print (f'Config: { config_name } ' )
193
+ with open (config_name , 'r' ) as config_file :
194
+ config = json .load (config_file )
195
+
196
+ if 'omp_env' not in config .keys ():
197
+ config ['omp_env' ] = []
198
+ # get parameters that are common for all cases
199
+ common_params = config ['common' ]
200
+ for params_set in config ['cases' ]:
201
+ cases = ['' ]
202
+ params = common_params .copy ()
203
+ params .update (params_set .copy ())
204
+ algorithm = params ['algorithm' ]
205
+ libs = params ['lib' ]
206
+ del params ['dataset' ], params ['algorithm' ], params ['lib' ]
207
+ generate_cases (params )
208
+ verbose_print (f'{ algorithm } algorithm: { len (libs ) * len (cases )} case(s),'
209
+ f' { len (params_set ["dataset" ])} dataset(s)\n ' )
210
+
211
+ for dataset in params_set ['dataset' ]:
212
+ if dataset ['source' ] in ['csv' , 'npy' ]:
213
+ paths = f'--file-X-train { dataset ["training" ]["x" ]} '
214
+ if 'y' in dataset ['training' ].keys ():
215
+ paths += f' --file-y-train { dataset ["training" ]["y" ]} '
216
+ if 'testing' in dataset .keys ():
217
+ paths += f' --file-X-test { dataset ["testing" ]["x" ]} '
218
+ if 'y' in dataset ['testing' ].keys ():
219
+ paths += f' --file-y-test { dataset ["testing" ]["y" ]} '
220
+ if 'name' in dataset .keys ():
221
+ dataset_name = dataset ['name' ]
222
+ else :
223
+ dataset_name = 'unknown'
224
+ elif dataset ['source' ] == 'synthetic' :
225
+ class GenerationArgs :
226
+ pass
227
+ gen_args = GenerationArgs ()
228
+ paths = ''
229
+
230
+ if 'seed' in params_set .keys ():
231
+ gen_args .seed = params_set ['seed' ]
232
+ else :
233
+ gen_args .seed = 777
234
+
235
+ # default values
236
+ gen_args .clusters = 10
237
+ gen_args .type = dataset ['type' ]
238
+ gen_args .samples = dataset ['training' ]['n_samples' ]
239
+ gen_args .features = dataset ['n_features' ]
240
+ if 'n_classes' in dataset .keys ():
241
+ gen_args .classes = dataset ['n_classes' ]
242
+ cls_num_for_file = f'-{ dataset ["n_classes" ]} '
243
+ elif 'n_clusters' in dataset .keys ():
244
+ gen_args .clusters = dataset ['n_clusters' ]
245
+ cls_num_for_file = f'-{ dataset ["n_clusters" ]} '
246
+ else :
247
+ cls_num_for_file = ''
248
+
249
+ file_prefix = f'data/synthetic-{ gen_args .type } { cls_num_for_file } -'
250
+ file_postfix = f'-{ gen_args .samples } x{ gen_args .features } .npy'
251
+
252
+ if gen_args .type == 'kmeans' :
253
+ gen_args .node_id = 0
254
+ gen_args .filei = f'{ file_prefix } init{ file_postfix } '
255
+ paths += f'--filei { gen_args .filei } '
256
+ gen_args .filet = f'{ file_prefix } threshold{ file_postfix } '
257
+
258
+ gen_args .filex = f'{ file_prefix } X-train{ file_postfix } '
259
+ paths += f' --file-X-train { gen_args .filex } '
267
260
if gen_args .type not in ['kmeans' , 'blobs' ]:
268
- gen_args .fileytest = f'{ file_prefix } y-test{ file_postfix } '
269
- paths += f' --file-y-test { gen_args .fileytest } '
261
+ gen_args .filey = f'{ file_prefix } y-train{ file_postfix } '
262
+ paths += f' --file-y-train { gen_args .filey } '
263
+
264
+ if 'testing' in dataset .keys ():
265
+ gen_args .test_samples = dataset ['testing' ]['n_samples' ]
266
+ gen_args .filextest = f'{ file_prefix } X-test{ file_postfix } '
267
+ paths += f' --file-X-test { gen_args .filextest } '
268
+ if gen_args .type not in ['kmeans' , 'blobs' ]:
269
+ gen_args .fileytest = f'{ file_prefix } y-test{ file_postfix } '
270
+ paths += f' --file-y-test { gen_args .fileytest } '
271
+ else :
272
+ gen_args .test_samples = 0
273
+ gen_args .filextest = gen_args .filex
274
+ if gen_args .type not in ['kmeans' , 'blobs' ]:
275
+ gen_args .fileytest = gen_args .filey
276
+
277
+ if not args .dummy_run and not os .path .isfile (gen_args .filex ):
278
+ if gen_args .type == 'regression' :
279
+ gen_regression (gen_args )
280
+ elif gen_args .type == 'classification' :
281
+ gen_classification (gen_args )
282
+ elif gen_args .type == 'kmeans' :
283
+ gen_kmeans (gen_args )
284
+ elif gen_args .type == 'blobs' :
285
+ gen_blobs (gen_args )
286
+ dataset_name = f'synthetic_{ gen_args .type } '
270
287
else :
271
- gen_args .test_samples = 0
272
- gen_args .filextest = gen_args .filex
273
- if gen_args .type not in ['kmeans' , 'blobs' ]:
274
- gen_args .fileytest = gen_args .filey
275
-
276
- if not args .dummy_run and not os .path .isfile (gen_args .filex ):
277
- if gen_args .type == 'regression' :
278
- gen_regression (gen_args )
279
- elif gen_args .type == 'classification' :
280
- gen_classification (gen_args )
281
- elif gen_args .type == 'kmeans' :
282
- gen_kmeans (gen_args )
283
- elif gen_args .type == 'blobs' :
284
- gen_blobs (gen_args )
285
- dataset_name = f'synthetic_{ gen_args .type } '
286
- else :
287
- raise ValueError (
288
- 'Unknown dataset source. Only synthetics datasets '
289
- 'and csv/npy files are supported now' )
290
- for lib in libs :
291
- env = os .environ .copy ()
292
- if lib == 'xgboost' :
293
- for var in config ['omp_env' ]:
294
- env [var ] = omp_env [var ]
295
- for i , case in enumerate (cases ):
296
- command = f'python { lib } /{ algorithm } .py --batch { batch } ' \
297
- + f'--arch { hostname } --header --output-format ' \
298
- + f'{ args .output_format } { case } { paths } ' \
299
- + f'--dataset-name { dataset_name } '
300
- while ' ' in command :
301
- command = command .replace (' ' , ' ' )
302
- verbose_print (command )
303
- if not args .dummy_run :
304
- case = f'{ lib } ,{ algorithm } ' + case
305
- stdout , stderr = read_output_from_command (command )
306
- stdout , extra_stdout = filter_stdout (stdout )
307
- stderr = filter_stderr (stderr )
308
- if extra_stdout != '' :
309
- stderr += f'CASE { case } EXTRA OUTPUT:\n ' \
310
- + f'{ extra_stdout } \n '
311
- if args .output_format == 'json' :
312
- try :
313
- json_result ['results' ].extend (json .loads (stdout ))
314
- except json .JSONDecodeError as decoding_exception :
315
- stderr += f'CASE { case } JSON DECODING ERROR:\n ' \
316
- + f'{ decoding_exception } \n { stdout } \n '
317
- elif args .output_format == 'csv' :
318
- csv_result += stdout + '\n '
319
- if stderr != '' :
320
- print (stderr , file = sys .stderr )
288
+ raise ValueError (
289
+ 'Unknown dataset source. Only synthetics datasets '
290
+ 'and csv/npy files are supported now' )
291
+ for lib in libs :
292
+ env = os .environ .copy ()
293
+ if lib == 'xgboost' :
294
+ for var in config ['omp_env' ]:
295
+ env [var ] = omp_env [var ]
296
+ for i , case in enumerate (cases ):
297
+ command = f'python { lib } /{ algorithm } .py --batch { batch } ' \
298
+ + f'--arch { hostname } --header --output-format ' \
299
+ + f'{ args .output_format } { case } { paths } ' \
300
+ + f'--dataset-name { dataset_name } '
301
+ while ' ' in command :
302
+ command = command .replace (' ' , ' ' )
303
+ verbose_print (command )
304
+ if not args .dummy_run :
305
+ case = f'{ lib } ,{ algorithm } ' + case
306
+ stdout , stderr = read_output_from_command (command )
307
+ stdout , extra_stdout = filter_stdout (stdout )
308
+ stderr = filter_stderr (stderr )
309
+ if extra_stdout != '' :
310
+ stderr += f'CASE { case } EXTRA OUTPUT:\n ' \
311
+ + f'{ extra_stdout } \n '
312
+ if args .output_format == 'json' :
313
+ try :
314
+ json_result ['results' ].extend (json .loads (stdout ))
315
+ except json .JSONDecodeError as decoding_exception :
316
+ stderr += f'CASE { case } JSON DECODING ERROR:\n ' \
317
+ + f'{ decoding_exception } \n { stdout } \n '
318
+ elif args .output_format == 'csv' :
319
+ csv_result += stdout + '\n '
320
+ if stderr != '' :
321
+ print (stderr , file = sys .stderr )
321
322
322
323
if args .output_format == 'json' :
323
324
json_result = json .dumps (json_result , indent = 4 )
0 commit comments