Skip to content

Commit 8bdcba1

Browse files
committed
FIXES #558 - use less memory for ensemble builder, allow setting memory limit
1 parent 66d8894 commit 8bdcba1

File tree

3 files changed

+34
-16
lines changed

3 files changed

+34
-16
lines changed

autosklearn/automl.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def __init__(self,
6666
initial_configurations_via_metalearning=25,
6767
ensemble_size=1,
6868
ensemble_nbest=1,
69+
ensemble_memory_limit=1000,
6970
seed=1,
7071
ml_memory_limit=3072,
7172
metadata_directory=None,
@@ -94,6 +95,7 @@ def __init__(self,
9495
initial_configurations_via_metalearning
9596
self._ensemble_size = ensemble_size
9697
self._ensemble_nbest = ensemble_nbest
98+
self._ensemble_memory_limit = ensemble_memory_limit
9799
self._seed = seed
98100
self._ml_memory_limit = ml_memory_limit
99101
self._data_memory_limit = None
@@ -635,18 +637,21 @@ def _get_ensemble_process(self, time_left_for_ensembles,
635637
else:
636638
self._ensemble_size = ensemble_size
637639

638-
return EnsembleBuilder(backend=self._backend,
639-
dataset_name=dataset_name,
640-
task_type=task,
641-
metric=metric,
642-
limit=time_left_for_ensembles,
643-
ensemble_size=ensemble_size,
644-
ensemble_nbest=ensemble_nbest,
645-
seed=self._seed,
646-
shared_mode=self._shared_mode,
647-
precision=precision,
648-
max_iterations=max_iterations,
649-
read_at_most=np.inf)
640+
return EnsembleBuilder(
641+
backend=self._backend,
642+
dataset_name=dataset_name,
643+
task_type=task,
644+
metric=metric,
645+
limit=time_left_for_ensembles,
646+
ensemble_size=ensemble_size,
647+
ensemble_nbest=ensemble_nbest,
648+
seed=self._seed,
649+
shared_mode=self._shared_mode,
650+
precision=precision,
651+
max_iterations=max_iterations,
652+
read_at_most=np.inf,
653+
memory_limit=self._ensemble_memory_limit,
654+
)
650655

651656
def _load_models(self):
652657
if self._shared_mode:

autosklearn/ensembles/ensemble_selection.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,12 @@ def _fast(self, predictions, labels):
7474
if s == 0:
7575
weighted_ensemble_prediction = np.zeros(predictions[0].shape)
7676
else:
77-
ensemble_prediction = np.mean(np.array(ensemble), axis=0)
77+
# Memory-efficient averaging!
78+
ensemble_prediction = np.zeros(ensemble[0].shape)
79+
for pred in ensemble:
80+
ensemble_prediction += pred
81+
ensemble_prediction /= s
82+
7883
weighted_ensemble_prediction = (s / float(s + 1)) * \
7984
ensemble_prediction
8085
fant_ensemble_prediction = np.zeros(weighted_ensemble_prediction.shape)

autosklearn/estimators.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def __init__(self,
1414
initial_configurations_via_metalearning=25,
1515
ensemble_size=50,
1616
ensemble_nbest=50,
17+
ensemble_memory_limit=1024,
1718
seed=1,
1819
ml_memory_limit=3072,
1920
include_estimators=None,
@@ -63,6 +64,11 @@ def __init__(self,
6364
ensemble. Implements `Model Library Pruning` from `Getting the
6465
most out of ensemble selection`.
6566
67+
ensemble_memory_limit : int, optional (1024)
68+
Memory limit in MB for the ensemble building process.
69+
`auto-sklearn` will reduce the number of considered models
70+
(``ensemble_nbest``) if the memory limit is reached.
71+
6672
seed : int, optional (default=1)
6773
Used to seed SMAC. Will determine the output file names.
6874
@@ -157,16 +163,16 @@ def __init__(self,
157163
optimization/validation set, which would later on be used to build
158164
an ensemble.
159165
* ``'model'`` : do not save any model files
160-
166+
161167
smac_scenario_args : dict, optional (None)
162168
Additional arguments inserted into the scenario of SMAC. See the
163169
`SMAC documentation <https://automl.github.io/SMAC3/stable/options.html?highlight=scenario#scenario>`_
164170
for a list of available arguments.
165-
171+
166172
get_smac_object_callback : callable
167173
Callback function to create an object of class
168174
`smac.optimizer.smbo.SMBO <https://automl.github.io/SMAC3/stable/apidoc/smac.optimizer.smbo.html>`_.
169-
The function must accept the arguments ``scenario_dict``,
175+
The function must accept the arguments ``scenario_dict``,
170176
``instances``, ``num_params``, ``runhistory``, ``seed`` and ``ta``.
171177
This is an advanced feature. Use only if you are familiar with
172178
`SMAC <https://automl.github.io/SMAC3/stable/index.html>`_.
@@ -191,6 +197,7 @@ def __init__(self,
191197
self.initial_configurations_via_metalearning = initial_configurations_via_metalearning
192198
self.ensemble_size = ensemble_size
193199
self.ensemble_nbest = ensemble_nbest
200+
self.ensemble_memory_limit = ensemble_memory_limit
194201
self.seed = seed
195202
self.ml_memory_limit = ml_memory_limit
196203
self.include_estimators = include_estimators
@@ -236,6 +243,7 @@ def build_automl(self):
236243
self.initial_configurations_via_metalearning,
237244
ensemble_size=self.ensemble_size,
238245
ensemble_nbest=self.ensemble_nbest,
246+
ensemble_memory_limit=self.ensemble_memory_limit,
239247
seed=self.seed,
240248
ml_memory_limit=self.ml_memory_limit,
241249
include_estimators=self.include_estimators,

0 commit comments

Comments
 (0)