Merge pull request #408 from kakawhq/patch-1

mfeurer · web-flow · commit 2c4eb9de8534 · 2018-01-08T18:02:31.000+01:00
Update example_parallel.py
diff --git a/example/example_parallel.py b/example/example_parallel.py
@@ -21,43 +21,46 @@
         pass
 
 
-def spawn_classifier(seed, dataset_name):
-    """Spawn a subprocess.
-
-    auto-sklearn does not take care of spawning worker processes. This
-    function, which is called several times in the main block is a new
-    process which runs one instance of auto-sklearn.
-    """
-
-    # Use the initial configurations from meta-learning only in one out of
-    # the four processes spawned. This prevents auto-sklearn from evaluating
-    # the same configurations in four processes.
-    if seed == 0:
-        initial_configurations_via_metalearning = 25
-        smac_scenario_args = {}
-    else:
-        initial_configurations_via_metalearning = 0
-        smac_scenario_args = {'initial_incumbent': 'RANDOM'}
-
-    # Arguments which are different to other runs of auto-sklearn:
-    # 1. all classifiers write to the same output directory
-    # 2. shared_mode is set to True, this enables sharing of data between
-    # models.
-    # 3. all instances of the AutoSklearnClassifier must have a different seed!
-    automl = AutoSklearnClassifier(
-        time_left_for_this_task=60, # sec., how long should this seed fit process run
-        per_run_time_limit=15, # sec., each model may only take this long before it's killed
-        ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML algorithm
-        shared_mode=True, # tmp folder will be shared between seeds
-        tmp_folder=tmp_folder,
-        output_folder=output_folder,
-        delete_tmp_folder_after_terminate=False,
-        ensemble_size=0, # ensembles will be built when all optimization runs are finished
-        initial_configurations_via_metalearning=initial_configurations_via_metalearning,
-        seed=seed,
-        smac_scenario_args=smac_scenario_args,
-    )
-    automl.fit(X_train, y_train, dataset_name=dataset_name)
+def get_spawn_classifier(X_train, y_train):
+    def spawn_classifier(seed, dataset_name):
+        """Spawn a subprocess.
+
+        auto-sklearn does not take care of spawning worker processes. This
+        function, which is called several times in the main block is a new
+        process which runs one instance of auto-sklearn.
+        """
+
+        # Use the initial configurations from meta-learning only in one out of
+        # the four processes spawned. This prevents auto-sklearn from evaluating
+        # the same configurations in four processes.
+        if seed == 0:
+            initial_configurations_via_metalearning = 25
+            smac_scenario_args = {}
+        else:
+            initial_configurations_via_metalearning = 0
+            smac_scenario_args = {'initial_incumbent': 'RANDOM'}
+
+        # Arguments which are different to other runs of auto-sklearn:
+        # 1. all classifiers write to the same output directory
+        # 2. shared_mode is set to True, this enables sharing of data between
+        # models.
+        # 3. all instances of the AutoSklearnClassifier must have a different seed!
+        automl = AutoSklearnClassifier(
+            time_left_for_this_task=60, # sec., how long should this seed fit process run
+            per_run_time_limit=15, # sec., each model may only take this long before it's killed
+            ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML algorithm
+            shared_mode=True, # tmp folder will be shared between seeds
+            tmp_folder=tmp_folder,
+            output_folder=output_folder,
+            delete_tmp_folder_after_terminate=False,
+            ensemble_size=0, # ensembles will be built when all optimization runs are finished
+            initial_configurations_via_metalearning=initial_configurations_via_metalearning,
+            seed=seed,
+            smac_scenario_args=smac_scenario_args,
+        )
+        automl.fit(X_train, y_train, dataset_name=dataset_name)
+    return spawn_classifier
+
 
 if __name__ == '__main__':
     
@@ -66,6 +69,7 @@ def spawn_classifier(seed, dataset_name):
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
     processes = []
+    spawn_classifier = get_spawn_classifier(X_train, y_train)
     for i in range(4): # set this at roughly half of your cores
         p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits'))
         p.start()