Skip to content

Commit 2c4eb9d

Browse files
authored
Merge pull request #408 from kakawhq/patch-1
Update example_parallel.py
2 parents 76c033b + 35b9264 commit 2c4eb9d

File tree

1 file changed

+41
-37
lines changed

1 file changed

+41
-37
lines changed

example/example_parallel.py

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -21,43 +21,46 @@
2121
pass
2222

2323

24-
def spawn_classifier(seed, dataset_name):
25-
"""Spawn a subprocess.
26-
27-
auto-sklearn does not take care of spawning worker processes. This
28-
function, which is called several times in the main block is a new
29-
process which runs one instance of auto-sklearn.
30-
"""
31-
32-
# Use the initial configurations from meta-learning only in one out of
33-
# the four processes spawned. This prevents auto-sklearn from evaluating
34-
# the same configurations in four processes.
35-
if seed == 0:
36-
initial_configurations_via_metalearning = 25
37-
smac_scenario_args = {}
38-
else:
39-
initial_configurations_via_metalearning = 0
40-
smac_scenario_args = {'initial_incumbent': 'RANDOM'}
41-
42-
# Arguments which are different to other runs of auto-sklearn:
43-
# 1. all classifiers write to the same output directory
44-
# 2. shared_mode is set to True, this enables sharing of data between
45-
# models.
46-
# 3. all instances of the AutoSklearnClassifier must have a different seed!
47-
automl = AutoSklearnClassifier(
48-
time_left_for_this_task=60, # sec., how long should this seed fit process run
49-
per_run_time_limit=15, # sec., each model may only take this long before it's killed
50-
ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML algorithm
51-
shared_mode=True, # tmp folder will be shared between seeds
52-
tmp_folder=tmp_folder,
53-
output_folder=output_folder,
54-
delete_tmp_folder_after_terminate=False,
55-
ensemble_size=0, # ensembles will be built when all optimization runs are finished
56-
initial_configurations_via_metalearning=initial_configurations_via_metalearning,
57-
seed=seed,
58-
smac_scenario_args=smac_scenario_args,
59-
)
60-
automl.fit(X_train, y_train, dataset_name=dataset_name)
24+
def get_spawn_classifier(X_train, y_train):
25+
def spawn_classifier(seed, dataset_name):
26+
"""Spawn a subprocess.
27+
28+
auto-sklearn does not take care of spawning worker processes. This
29+
function, which is called several times in the main block is a new
30+
process which runs one instance of auto-sklearn.
31+
"""
32+
33+
# Use the initial configurations from meta-learning only in one out of
34+
# the four processes spawned. This prevents auto-sklearn from evaluating
35+
# the same configurations in four processes.
36+
if seed == 0:
37+
initial_configurations_via_metalearning = 25
38+
smac_scenario_args = {}
39+
else:
40+
initial_configurations_via_metalearning = 0
41+
smac_scenario_args = {'initial_incumbent': 'RANDOM'}
42+
43+
# Arguments which are different to other runs of auto-sklearn:
44+
# 1. all classifiers write to the same output directory
45+
# 2. shared_mode is set to True, this enables sharing of data between
46+
# models.
47+
# 3. all instances of the AutoSklearnClassifier must have a different seed!
48+
automl = AutoSklearnClassifier(
49+
time_left_for_this_task=60, # sec., how long should this seed fit process run
50+
per_run_time_limit=15, # sec., each model may only take this long before it's killed
51+
ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML algorithm
52+
shared_mode=True, # tmp folder will be shared between seeds
53+
tmp_folder=tmp_folder,
54+
output_folder=output_folder,
55+
delete_tmp_folder_after_terminate=False,
56+
ensemble_size=0, # ensembles will be built when all optimization runs are finished
57+
initial_configurations_via_metalearning=initial_configurations_via_metalearning,
58+
seed=seed,
59+
smac_scenario_args=smac_scenario_args,
60+
)
61+
automl.fit(X_train, y_train, dataset_name=dataset_name)
62+
return spawn_classifier
63+
6164

6265
if __name__ == '__main__':
6366

@@ -66,6 +69,7 @@ def spawn_classifier(seed, dataset_name):
6669
sklearn.model_selection.train_test_split(X, y, random_state=1)
6770

6871
processes = []
72+
spawn_classifier = get_spawn_classifier(X_train, y_train)
6973
for i in range(4): # set this at roughly half of your cores
7074
p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits'))
7175
p.start()

0 commit comments

Comments
 (0)