Skip to content

Commit b53c7e1

Browse files
ahn1340mfeurer
authored andcommitted
Fix classifier bug (#585)
* . * . * AutoSklearnClassifier/Regressor's fit, refit, fit_ensemble now return self. * Initial commit. Work in Progress. * Fix minor printing error in sprint_statistics. * Revert "Fix#460" * Raise error if ensemble is not built (#480) * . * . * AutoSklearnClassifier/Regressor's fit, refit, fit_ensemble now return self. * Initial commit. Work in Progress. * Fix minor printing error in sprint_statistics. * Revert "Fix#460" * Resolve rebase conflict * combined unittests to reduce travis runtime * . * . * . * . * . * Check target type at the beginning of the fitting process. * . * Fixed minor error in uniitest * . * Add unittest for target type checking. * . * . * [Debug] try with numpy version 1.14.5 * [Debug] Check if numpy version 1.14.6 raises error. * Check target type at the beginning of the fitting process. * . * Fixed minor error in uniitest * . * Add unittest for target type checking. * . * . * [Debug] Check if numpy version 1.14.6 raises error. * Fix numpy version to 1.14.5 * Add comment to Mock in test_type_of_target * Fix line length in example_parallel.py * Fix minor error * FIX classifier returning prediction larger than 1 * Remove comments * ADD unittest for ensemble_selection.predict() * minor FIX * ADD assertion in predict_proba to check probabilities sum up to 1. * REVERT changes in autosklearn/ensemble_builder.py * simplify ensemble prediction method * Modify assertion statement * ADD case check in ensemble_selection.predict() * Fix minor error in pred_probs verficiation. * Modify unittest for ensemble_selection.predict() * FIX flake8 errors * FIX flake8 error * ADD Ignore assertion for multilabel, check probabilities lie between [0, 1]. * Debug flake8 error
1 parent ff2a91c commit b53c7e1

File tree

3 files changed

+106
-8
lines changed

3 files changed

+106
-8
lines changed

autosklearn/ensembles/ensemble_selection.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,23 @@ def _bagging(self, predictions, labels, fraction=0.5, n_bags=20):
209209
return np.array(order_of_each_bag)
210210

211211
def predict(self, predictions):
212-
non_null_weights = (weight for weight in self.weights_ if weight > 0)
213-
for i, weight in enumerate(non_null_weights):
214-
predictions[i] *= weight
215-
return np.sum(predictions, axis=0)
212+
predictions = np.asarray(predictions)
213+
214+
# if predictions.shape[0] == len(self.weights_),
215+
# predictions include those of zero-weight models.
216+
if predictions.shape[0] == len(self.weights_):
217+
return np.average(predictions, axis=0, weights=self.weights_)
218+
219+
# if prediction model.shape[0] == len(non_null_weights),
220+
# predictions do not include those of zero-weight models.
221+
elif predictions.shape[0] == np.count_nonzero(self.weights_):
222+
non_null_weights = [w for w in self.weights_ if w > 0]
223+
return np.average(predictions, axis=0, weights=non_null_weights)
224+
225+
# If none of the above applies, then something must have gone wrong.
226+
else:
227+
raise ValueError("The dimensions of ensemble predictions"
228+
" and ensemble weights do not match!")
216229

217230
def __str__(self):
218231
return 'Ensemble Selection:\n\tTrajectory: %s\n\tMembers: %s' \

autosklearn/estimators.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- encoding: utf-8 -*-
22
from sklearn.base import BaseEstimator
3+
import numpy as np
34

45
from autosklearn.automl import AutoMLClassifier, AutoMLRegressor
56
from autosklearn.util.backend import create
@@ -486,6 +487,9 @@ def fit(self, X, y,
486487
raise ValueError("classification with data of type %s is"
487488
" not supported" % target_type)
488489

490+
# remember target type for using in predict_proba later.
491+
self.target_type = target_type
492+
489493
super().fit(
490494
X=X,
491495
y=y,
@@ -527,9 +531,25 @@ def predict_proba(self, X, batch_size=None, n_jobs=1):
527531
The predicted class probabilities.
528532
529533
"""
530-
return super().predict_proba(
534+
pred_proba = super().predict_proba(
531535
X, batch_size=batch_size, n_jobs=n_jobs)
532536

537+
# Check if all probabilities sum up to 1.
538+
# Assert only if target type is not multilabel-indicator.
539+
if self.target_type not in ['multilabel-indicator']:
540+
assert(
541+
np.allclose(
542+
np.sum(pred_proba, axis=1),
543+
np.ones_like(pred_proba[:, 0]))
544+
), "prediction probability does not sum up to 1!"
545+
546+
# Check that all probability values lie between 0 and 1.
547+
assert(
548+
(pred_proba >= 0).all() and (pred_proba <= 1).all()
549+
), "found prediction probability value outside of [0, 1]!"
550+
551+
return pred_proba
552+
533553
def _get_automl_class(self):
534554
return AutoMLClassifier
535555

test/test_ensemble_builder/test_ensemble.py

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
import unittest
77
import unittest.mock
88

9+
from autosklearn.metrics import roc_auc, accuracy
10+
from autosklearn.ensembles.ensemble_selection import EnsembleSelection
11+
from autosklearn.ensemble_builder import EnsembleBuilder, Y_VALID, Y_TEST
912
import numpy as np
1013

1114
this_directory = os.path.dirname(__file__)
1215
sys.path.append(this_directory)
1316

14-
from autosklearn.ensemble_builder import EnsembleBuilder, Y_ENSEMBLE, Y_VALID, Y_TEST
15-
from autosklearn.metrics import roc_auc
16-
1717

1818
class BackendMock(object):
1919

@@ -260,3 +260,68 @@ def testLimit(self):
260260

261261
# it should try to reduce ensemble_nbest until it also failed at 2
262262
self.assertEqual(ensbuilder.ensemble_nbest,1)
263+
264+
265+
class EnsembleSelectionTest(unittest.TestCase):
266+
def testPredict(self):
267+
# Test that ensemble prediction applies weights correctly to given
268+
# predictions. There are two possible cases:
269+
# 1) predictions.shape[0] == len(self.weights_). In this case,
270+
# predictions include those made by zero-weighted models. Therefore,
271+
# we simply apply each weights to the corresponding model preds.
272+
# 2) predictions.shape[0] < len(self.weights_). In this case,
273+
# predictions exclude those made by zero-weighted models. Therefore,
274+
# we first exclude all occurrences of zero in self.weights_, and then
275+
# apply the weights.
276+
# If none of the above is the case, predict() raises Error.
277+
ensemble = EnsembleSelection(ensemble_size=3,
278+
task_type=1,
279+
metric=accuracy,
280+
)
281+
# Test for case 1. Create (3, 2, 2) predictions.
282+
per_model_pred = np.array([
283+
[[0.9, 0.1],
284+
[0.4, 0.6]],
285+
[[0.8, 0.2],
286+
[0.3, 0.7]],
287+
[[1.0, 0.0],
288+
[0.1, 0.9]]
289+
])
290+
# Weights of 3 hypothetical models
291+
ensemble.weights_ = [0.7, 0.2, 0.1]
292+
pred = ensemble.predict(per_model_pred)
293+
truth = np.array([[0.89, 0.11], # This should be the true prediction.
294+
[0.35, 0.65]])
295+
self.assertTrue(np.allclose(pred, truth))
296+
297+
# Test for case 2.
298+
per_model_pred = np.array([
299+
[[0.9, 0.1],
300+
[0.4, 0.6]],
301+
[[0.8, 0.2],
302+
[0.3, 0.7]],
303+
[[1.0, 0.0],
304+
[0.1, 0.9]]
305+
])
306+
# The third model now has weight of zero.
307+
ensemble.weights_ = [0.7, 0.2, 0.0, 0.1]
308+
pred = ensemble.predict(per_model_pred)
309+
truth = np.array([[0.89, 0.11],
310+
[0.35, 0.65]])
311+
self.assertTrue(np.allclose(pred, truth))
312+
313+
# Test for error case.
314+
per_model_pred = np.array([
315+
[[0.9, 0.1],
316+
[0.4, 0.6]],
317+
[[0.8, 0.2],
318+
[0.3, 0.7]],
319+
[[1.0, 0.0],
320+
[0.1, 0.9]]
321+
])
322+
# Now the weights have 2 zero weights and 2 non-zero weights,
323+
# which is incompatible.
324+
ensemble.weights_ = [0.6, 0.0, 0.0, 0.4]
325+
326+
with self.assertRaises(ValueError):
327+
ensemble.predict(per_model_pred)

0 commit comments

Comments
 (0)