@@ -74,7 +74,7 @@ def is_ht_enabled():
74
74
return False
75
75
return False
76
76
except FileNotFoundError :
77
- print ('Impossible to check hyperthreading via lscpu' )
77
+ verbose_print ('Impossible to check hyperthreading via lscpu' )
78
78
return False
79
79
80
80
@@ -186,7 +186,7 @@ def is_ht_enabled():
186
186
verbose_print (f'{ algorithm } algorithm: { len (libs ) * len (cases )} case(s),'
187
187
f' { len (params_set ["dataset" ])} dataset(s)\n ' )
188
188
for dataset in params_set ['dataset' ]:
189
- if isinstance ( dataset ['training' ], dict ) :
189
+ if dataset ['source' ] in [ 'csv' , 'npy' ] :
190
190
paths = f'--file-X-train { dataset ["training" ]["x" ]} '
191
191
if 'y' in dataset ['training' ].keys ():
192
192
paths += f' --file-y-train { dataset ["training" ]["y" ]} '
@@ -198,7 +198,7 @@ def is_ht_enabled():
198
198
dataset_name = dataset ['name' ]
199
199
else :
200
200
dataset_name = 'unknown'
201
- elif dataset ['training' ]. startswith ( 'synth' ) :
201
+ elif dataset ['source' ] == 'synthetic' :
202
202
class GenerationArgs :
203
203
pass
204
204
gen_args = GenerationArgs ()
@@ -209,67 +209,65 @@ class GenerationArgs:
209
209
else :
210
210
gen_args .seed = 777
211
211
212
- dataset_params = dataset ['training' ].split ('_' )
213
- gen_args .task = dataset_params [1 ]
214
- gen_args .samples = int (dataset_params [2 ])
215
- gen_args .features = int (dataset_params [3 ])
216
- if gen_args .task in ['clsf' , 'kmeans' , 'blobs' ]:
217
- cls_num_for_file = '-' + dataset_params [4 ]
218
- gen_args .classes = int (dataset_params [4 ])
219
- gen_args .clusters = gen_args .classes
212
+ gen_args .type = dataset ['type' ]
213
+ gen_args .samples = dataset ['training' ]['n_samples' ]
214
+ gen_args .features = dataset ['n_features' ]
215
+ if 'n_classes' in dataset .keys ():
216
+ gen_args .classes = dataset ['n_classes' ]
217
+ cls_num_for_file = f'-{ dataset ["n_classes" ]} '
218
+ elif 'n_clusters' in dataset .keys ():
219
+ gen_args .clusters = dataset ['n_clusters' ]
220
+ cls_num_for_file = f'-{ dataset ["n_clusters" ]} '
220
221
else :
221
222
cls_num_for_file = ''
222
223
223
- file_prefix = f'data/synth -{ gen_args .task } { cls_num_for_file } -'
224
+ file_prefix = f'data/synthetic -{ gen_args .type } { cls_num_for_file } -'
224
225
file_postfix = f'-{ gen_args .samples } x{ gen_args .features } .npy'
225
226
226
- if gen_args .task == 'kmeans' :
227
+ if gen_args .type == 'kmeans' :
227
228
gen_args .node_id = 0
228
229
gen_args .filei = f'{ file_prefix } init{ file_postfix } '
229
230
paths += f'--filei { gen_args .filei } '
230
231
gen_args .filet = f'{ file_prefix } threshold{ file_postfix } '
231
232
232
233
gen_args .filex = f'{ file_prefix } X-train{ file_postfix } '
233
234
paths += f' --file-X-train { gen_args .filex } '
234
- if gen_args .task not in ['kmeans' , 'blobs' ]:
235
+ if gen_args .type not in ['kmeans' , 'blobs' ]:
235
236
gen_args .filey = f'{ file_prefix } y-train{ file_postfix } '
236
237
paths += f' --file-y-train { gen_args .filey } '
237
238
238
239
if 'testing' in dataset .keys ():
239
- dataset_params = dataset ['testing' ].split ('_' )
240
- gen_args .test_samples = int (dataset_params [2 ])
240
+ gen_args .test_samples = dataset ['testing' ]['n_samples' ]
241
241
gen_args .filextest = f'{ file_prefix } X-test{ file_postfix } '
242
242
paths += f' --file-X-test { gen_args .filextest } '
243
- if gen_args .task not in ['kmeans' , 'blobs' ]:
243
+ if gen_args .type not in ['kmeans' , 'blobs' ]:
244
244
gen_args .fileytest = f'{ file_prefix } y-test{ file_postfix } '
245
245
paths += f' --file-y-test { gen_args .fileytest } '
246
246
else :
247
247
gen_args .test_samples = 0
248
248
gen_args .filextest = gen_args .filex
249
- if gen_args .task not in ['kmeans' , 'blobs' ]:
249
+ if gen_args .type not in ['kmeans' , 'blobs' ]:
250
250
gen_args .fileytest = gen_args .filey
251
251
252
252
if not args .dummy_run and not os .path .isfile (gen_args .filex ):
253
- if gen_args .task == 'reg ' :
253
+ if gen_args .type == 'regression ' :
254
254
gen_regression (gen_args )
255
- elif gen_args .task == 'clsf ' :
255
+ elif gen_args .type == 'classification ' :
256
256
gen_classification (gen_args )
257
- elif gen_args .task == 'kmeans' :
257
+ elif gen_args .type == 'kmeans' :
258
258
gen_kmeans (gen_args )
259
- elif gen_args .task == 'blobs' :
259
+ elif gen_args .type == 'blobs' :
260
260
gen_blobs (gen_args )
261
- dataset_name = f'synthetic_{ gen_args .task } '
261
+ dataset_name = f'synthetic_{ gen_args .type } '
262
262
else :
263
263
raise ValueError (
264
- 'Unknown dataset. Only synthetics datasets '
264
+ 'Unknown dataset source . Only synthetics datasets '
265
265
'and csv/npy files are supported now' )
266
266
for lib in libs :
267
+ env = os .environ .copy ()
267
268
if lib == 'xgboost' :
268
269
env ['OMP_NUM_THREADS' ] = omp_num_threads
269
270
env ['OMP_PLACES' ] = omp_places
270
- else :
271
- env ['OMP_NUM_THREADS' ] = ''
272
- env ['OMP_PLACES' ] = ''
273
271
274
272
for i , case in enumerate (cases ):
275
273
command = f'python { lib } /{ algorithm } .py --batch { batch } ' \
0 commit comments