2121 pass
2222
2323
24- def spawn_classifier (seed , dataset_name ):
25- """Spawn a subprocess.
26-
27- auto-sklearn does not take care of spawning worker processes. This
28- function, which is called several times in the main block is a new
29- process which runs one instance of auto-sklearn.
30- """
31-
32- # Use the initial configurations from meta-learning only in one out of
33- # the four processes spawned. This prevents auto-sklearn from evaluating
34- # the same configurations in four processes.
35- if seed == 0 :
36- initial_configurations_via_metalearning = 25
37- smac_scenario_args = {}
38- else :
39- initial_configurations_via_metalearning = 0
40- smac_scenario_args = {'initial_incumbent' : 'RANDOM' }
41-
42- # Arguments which are different to other runs of auto-sklearn:
43- # 1. all classifiers write to the same output directory
44- # 2. shared_mode is set to True, this enables sharing of data between
45- # models.
46- # 3. all instances of the AutoSklearnClassifier must have a different seed!
47- automl = AutoSklearnClassifier (
48- time_left_for_this_task = 60 , # sec., how long should this seed fit process run
49- per_run_time_limit = 15 , # sec., each model may only take this long before it's killed
50- ml_memory_limit = 1024 , # MB, memory limit imposed on each call to a ML algorithm
51- shared_mode = True , # tmp folder will be shared between seeds
52- tmp_folder = tmp_folder ,
53- output_folder = output_folder ,
54- delete_tmp_folder_after_terminate = False ,
55- ensemble_size = 0 , # ensembles will be built when all optimization runs are finished
56- initial_configurations_via_metalearning = initial_configurations_via_metalearning ,
57- seed = seed ,
58- smac_scenario_args = smac_scenario_args ,
59- )
60- automl .fit (X_train , y_train , dataset_name = dataset_name )
24+ def get_spawn_classifier (X_train , y_train ):
25+ def spawn_classifier (seed , dataset_name ):
26+ """Spawn a subprocess.
27+
28+ auto-sklearn does not take care of spawning worker processes. This
29+ function, which is called several times in the main block is a new
30+ process which runs one instance of auto-sklearn.
31+ """
32+
33+ # Use the initial configurations from meta-learning only in one out of
34+ # the four processes spawned. This prevents auto-sklearn from evaluating
35+ # the same configurations in four processes.
36+ if seed == 0 :
37+ initial_configurations_via_metalearning = 25
38+ smac_scenario_args = {}
39+ else :
40+ initial_configurations_via_metalearning = 0
41+ smac_scenario_args = {'initial_incumbent' : 'RANDOM' }
42+
43+ # Arguments which are different to other runs of auto-sklearn:
44+ # 1. all classifiers write to the same output directory
45+ # 2. shared_mode is set to True, this enables sharing of data between
46+ # models.
47+ # 3. all instances of the AutoSklearnClassifier must have a different seed!
48+ automl = AutoSklearnClassifier (
49+ time_left_for_this_task = 60 , # sec., how long should this seed fit process run
50+ per_run_time_limit = 15 , # sec., each model may only take this long before it's killed
51+ ml_memory_limit = 1024 , # MB, memory limit imposed on each call to a ML algorithm
52+ shared_mode = True , # tmp folder will be shared between seeds
53+ tmp_folder = tmp_folder ,
54+ output_folder = output_folder ,
55+ delete_tmp_folder_after_terminate = False ,
56+ ensemble_size = 0 , # ensembles will be built when all optimization runs are finished
57+ initial_configurations_via_metalearning = initial_configurations_via_metalearning ,
58+ seed = seed ,
59+ smac_scenario_args = smac_scenario_args ,
60+ )
61+ automl .fit (X_train , y_train , dataset_name = dataset_name )
62+ return spawn_classifier
63+
6164
6265if __name__ == '__main__' :
6366
@@ -66,6 +69,7 @@ def spawn_classifier(seed, dataset_name):
6669 sklearn .model_selection .train_test_split (X , y , random_state = 1 )
6770
6871 processes = []
72+ spawn_classifier = get_spawn_classifier (X_train , y_train )
6973 for i in range (4 ): # set this at roughly half of your cores
7074 p = multiprocessing .Process (target = spawn_classifier , args = (i , 'digits' ))
7175 p .start ()
0 commit comments