Skip to content

Commit 536a16c

Browse files
Ensemble selection mem improvements (#882)
* Reducing ensemble selection Ram requierements * Flake fix * Fix fant ensemble
1 parent 003836d commit 536a16c

File tree

1 file changed

+32
-51
lines changed

1 file changed

+32
-51
lines changed

autosklearn/ensembles/ensemble_selection.py

Lines changed: 32 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,13 @@ def __init__(
1515
ensemble_size: int,
1616
task_type: int,
1717
metric: Scorer,
18-
sorted_initialization: bool = False,
1918
bagging: bool = False,
2019
mode: str = 'fast',
2120
random_state: np.random.RandomState = None,
2221
):
2322
self.ensemble_size = ensemble_size
2423
self.task_type = task_type
2524
self.metric = metric
26-
self.sorted_initialization = sorted_initialization
2725
self.bagging = bagging
2826
self.mode = mode
2927
self.random_state = random_state
@@ -64,38 +62,47 @@ def _fast(self, predictions, labels):
6462

6563
ensemble_size = self.ensemble_size
6664

67-
if self.sorted_initialization:
68-
n_best = 20
69-
indices = self._sorted_initialization(predictions, labels, n_best)
70-
for idx in indices:
71-
ensemble.append(predictions[idx])
72-
order.append(idx)
73-
ensemble_ = np.array(ensemble).mean(axis=0)
74-
ensemble_performance = calculate_score(
75-
labels, ensemble_, self.task_type, self.metric,
76-
ensemble_.shape[1])
77-
trajectory.append(ensemble_performance)
78-
ensemble_size -= n_best
79-
65+
weighted_ensemble_prediction = np.zeros(predictions[0].shape)
66+
fant_ensemble_prediction = np.zeros(weighted_ensemble_prediction.shape)
8067
for i in range(ensemble_size):
8168
scores = np.zeros((len(predictions)))
8269
s = len(ensemble)
8370
if s == 0:
84-
weighted_ensemble_prediction = np.zeros(predictions[0].shape)
71+
weighted_ensemble_prediction.fill(0.0)
8572
else:
86-
# Memory-efficient averaging!
87-
ensemble_prediction = np.zeros(ensemble[0].shape)
73+
weighted_ensemble_prediction.fill(0.0)
8874
for pred in ensemble:
89-
ensemble_prediction += pred
90-
ensemble_prediction /= s
91-
92-
weighted_ensemble_prediction = (s / float(s + 1)) * ensemble_prediction
93-
fant_ensemble_prediction = np.zeros(weighted_ensemble_prediction.shape)
75+
np.add(
76+
weighted_ensemble_prediction,
77+
pred,
78+
out=weighted_ensemble_prediction,
79+
)
80+
np.multiply(
81+
weighted_ensemble_prediction,
82+
1/s,
83+
out=weighted_ensemble_prediction,
84+
)
85+
np.multiply(
86+
weighted_ensemble_prediction,
87+
(s / float(s + 1)),
88+
out=weighted_ensemble_prediction,
89+
)
90+
91+
# Memory-efficient averaging!
9492
for j, pred in enumerate(predictions):
9593
# TODO: this could potentially be vectorized! - let's profile
9694
# the script first!
97-
fant_ensemble_prediction[:, :] = \
98-
weighted_ensemble_prediction + (1. / float(s + 1)) * pred
95+
fant_ensemble_prediction.fill(0.0)
96+
np.add(
97+
fant_ensemble_prediction,
98+
weighted_ensemble_prediction,
99+
out=fant_ensemble_prediction
100+
)
101+
np.add(
102+
fant_ensemble_prediction,
103+
(1. / float(s + 1)) * pred,
104+
out=fant_ensemble_prediction
105+
)
99106
scores[j] = self.metric._optimum - calculate_score(
100107
solution=labels,
101108
prediction=fant_ensemble_prediction,
@@ -127,22 +134,6 @@ def _slow(self, predictions, labels):
127134

128135
ensemble_size = self.ensemble_size
129136

130-
if self.sorted_initialization:
131-
n_best = 20
132-
indices = self._sorted_initialization(predictions, labels, n_best)
133-
for idx in indices:
134-
ensemble.append(predictions[idx])
135-
order.append(idx)
136-
ensemble_ = np.array(ensemble).mean(axis=0)
137-
ensemble_performance = calculate_score(
138-
solution=labels,
139-
prediction=ensemble_,
140-
task_type=self.task_type,
141-
metric=self.metric,
142-
all_scoring_functions=False)
143-
trajectory.append(ensemble_performance)
144-
ensemble_size -= n_best
145-
146137
for i in range(ensemble_size):
147138
scores = np.zeros([predictions.shape[0]])
148139
for j, pred in enumerate(predictions):
@@ -180,16 +171,6 @@ def _calculate_weights(self):
180171

181172
self.weights_ = weights
182173

183-
def _sorted_initialization(self, predictions, labels, n_best):
184-
perf = np.zeros([predictions.shape[0]])
185-
186-
for idx, prediction in enumerate(predictions):
187-
perf[idx] = calculate_score(labels, prediction, self.task_type,
188-
self.metric, predictions.shape[1])
189-
190-
indices = np.argsort(perf)[perf.shape[0] - n_best:]
191-
return indices
192-
193174
def _bagging(self, predictions, labels, fraction=0.5, n_bags=20):
194175
"""Rich Caruana's ensemble selection method with bagging."""
195176
raise ValueError('Bagging might not work with class-based interface!')

0 commit comments

Comments
 (0)