Skip to content

Commit 9343373

Browse files
FIX_1061 (#1063)
* FIX_1061 * Fxi type of target * Moving to classes_ * classes_ should be np.ndarray * Force float before nan
1 parent 425fbac commit 9343373

File tree

5 files changed

+88
-13
lines changed

5 files changed

+88
-13
lines changed

autosklearn/data/target_validator.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,3 +386,20 @@ def _check_data(
386386
self.type_of_target,
387387
supported_output_types
388388
))
389+
390+
@property
391+
def classes_(self) -> np.ndarray:
392+
"""
393+
Complies with scikit learn classes_ attribute,
394+
which consist of a ndarray of shape (n_classes,)
395+
where n_classes are the number of classes seen while fitting
396+
a encoder to the targets.
397+
Returns
398+
-------
399+
classes_: np.ndarray
400+
The unique classes seen during encoding of a classifier
401+
"""
402+
if self.encoder is None:
403+
return np.array([])
404+
else:
405+
return self.encoder.categories_[0]

autosklearn/estimators.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import dask.distributed
66
import joblib
77
import numpy as np
8-
from sklearn.base import BaseEstimator
8+
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
99
from sklearn.utils.multiclass import type_of_target
1010

1111
from autosklearn.automl import AutoMLClassifier, AutoMLRegressor, AutoML
@@ -219,11 +219,11 @@ def __init__(
219219
:meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
220220
Metrics`_.
221221
If None is provided, a default metric is selected depending on the task.
222-
222+
223223
scoring_functions : List[Scorer], optional (None)
224-
List of scorers which will be calculated for each pipeline and results will be
224+
List of scorers which will be calculated for each pipeline and results will be
225225
available via ``cv_results``
226-
226+
227227
load_models : bool, optional (True)
228228
Whether to load the models after fitting Auto-sklearn.
229229
@@ -266,13 +266,14 @@ def __init__(
266266
self.smac_scenario_args = smac_scenario_args
267267
self.logging_config = logging_config
268268
self.metadata_directory = metadata_directory
269-
self._metric = metric
270-
self._scoring_functions = scoring_functions
271-
self._load_models = load_models
269+
self.metric = metric
270+
self.scoring_functions = scoring_functions
271+
self.load_models = load_models
272272

273273
self.automl_ = None # type: Optional[AutoML]
274274
# n_jobs after conversion to a number (b/c default is None)
275275
self._n_jobs = None
276+
276277
super().__init__()
277278

278279
def __getstate__(self):
@@ -323,8 +324,8 @@ def build_automl(
323324
smac_scenario_args=smac_scenario_args,
324325
logging_config=self.logging_config,
325326
metadata_directory=self.metadata_directory,
326-
metric=self._metric,
327-
scoring_functions=self._scoring_functions
327+
metric=self.metric,
328+
scoring_functions=self.scoring_functions
328329
)
329330

330331
return automl
@@ -353,7 +354,7 @@ def fit(self, **kwargs):
353354
tmp_folder=self.tmp_folder,
354355
output_folder=self.output_folder,
355356
)
356-
self.automl_.fit(load_models=self._load_models, **kwargs)
357+
self.automl_.fit(load_models=self.load_models, **kwargs)
357358

358359
return self
359360

@@ -516,7 +517,7 @@ def get_configuration_space(self, X, y):
516517
return self.automl_.configuration_space
517518

518519

519-
class AutoSklearnClassifier(AutoSklearnEstimator):
520+
class AutoSklearnClassifier(AutoSklearnEstimator, ClassifierMixin):
520521
"""
521522
This class implements the classification task.
522523
@@ -597,6 +598,11 @@ def fit(self, X, y,
597598
dataset_name=dataset_name,
598599
)
599600

601+
# After fit, a classifier is expected to define classes_
602+
# A list of class labels known to the classifier, mapping each label
603+
# to a numerical index used in the model representation our output.
604+
self.classes_ = self.automl_.InputValidator.target_validator.classes_
605+
600606
return self
601607

602608
def predict(self, X, batch_size=None, n_jobs=1):
@@ -656,7 +662,7 @@ def _get_automl_class(self):
656662
return AutoMLClassifier
657663

658664

659-
class AutoSklearnRegressor(AutoSklearnEstimator):
665+
class AutoSklearnRegressor(AutoSklearnEstimator, RegressorMixin):
660666
"""
661667
This class implements the regression task.
662668

test/test_automl/test_estimators.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import copy
12
import glob
23
import os
4+
import inspect
35
import pickle
46
import re
57
import sys
@@ -15,6 +17,10 @@
1517
import sklearn
1618
import sklearn.dummy
1719
import sklearn.datasets
20+
from sklearn.base import clone
21+
from sklearn.base import ClassifierMixin, RegressorMixin
22+
from sklearn.base import is_classifier
23+
1824

1925
from autosklearn.data.validation import InputValidator
2026
import autosklearn.pipeline.util as putil
@@ -160,6 +166,10 @@ def test_type_of_target(mock_estimator):
160166
])
161167

162168
cls = AutoSklearnClassifier(ensemble_size=0)
169+
cls.automl_ = unittest.mock.Mock()
170+
cls.automl_.InputValidator = unittest.mock.Mock()
171+
cls.automl_.InputValidator.target_validator = unittest.mock.Mock()
172+
163173
# Illegal target types for classification: continuous,
164174
# multiclass-multioutput, continuous-multioutput.
165175
expected_msg = r".*Classification with data of type"
@@ -253,6 +263,10 @@ def test_cv_results(tmp_dir, output_dir):
253263
ensemble_size=0,
254264
scoring_functions=[autosklearn.metrics.precision,
255265
autosklearn.metrics.roc_auc])
266+
267+
params = cls.get_params()
268+
original_params = copy.deepcopy(params)
269+
256270
cls.fit(X_train, Y_train)
257271
cv_results = cls.cv_results_
258272
assert isinstance(cv_results, dict), type(cv_results)
@@ -275,6 +289,27 @@ def test_cv_results(tmp_dir, output_dir):
275289
cv_results.items() if key.startswith('param_')]
276290
assert all(cv_result_items), cv_results.items()
277291

292+
# Compare the state of the model parameters with the original parameters
293+
new_params = clone(cls).get_params()
294+
for param_name, original_value in original_params.items():
295+
new_value = new_params[param_name]
296+
297+
# Taken from Sklearn code:
298+
# We should never change or mutate the internal state of input
299+
# parameters by default. To check this we use the joblib.hash function
300+
# that introspects recursively any subobjects to compute a checksum.
301+
# The only exception to this rule of immutable constructor parameters
302+
# is possible RandomState instance but in this check we explicitly
303+
# fixed the random_state params recursively to be integer seeds.
304+
assert joblib.hash(new_value) == joblib.hash(original_value), (
305+
"Estimator %s should not change or mutate "
306+
" the parameter %s from %s to %s during fit."
307+
% (cls, param_name, original_value, new_value))
308+
309+
# Comply with https://scikit-learn.org/dev/glossary.html#term-classes
310+
is_classifier(cls)
311+
assert hasattr(cls, 'classes_')
312+
278313

279314
@unittest.mock.patch('autosklearn.estimators.AutoSklearnEstimator.build_automl')
280315
def test_fit_n_jobs_negative(build_automl_patch):
@@ -614,3 +649,14 @@ def test_autosklearn2_classification_methods_returns_self(dask_client):
614649
) >= 2 / 3, print_debug_information(automl)
615650

616651
pickle.dumps(automl_fitted)
652+
653+
654+
@pytest.mark.parametrize("class_", [AutoSklearnClassifier, AutoSklearnRegressor,
655+
AutoSklearn2Classifier])
656+
def test_check_estimator_signature(class_):
657+
# Make sure signature is store in self
658+
expected_subclass = ClassifierMixin if 'Classifier' in str(class_) else RegressorMixin
659+
assert issubclass(class_, expected_subclass)
660+
estimator = class_()
661+
for expected in list(inspect.signature(class_).parameters):
662+
assert hasattr(estimator, expected)

test/test_data/test_feature_validator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def input_data_featuretest(request):
3535
['a', 'b', 'd', 'r', 'b', 'c'],
3636
])
3737
elif request.param == 'numpy_categoricalonly_nan':
38-
array = np.random.randint(10, size=(100, 10))
38+
array = np.random.randint(10, size=(100, 10)).astype('float')
3939
array[50, 0:5] = np.nan
4040
return array
4141
elif request.param == 'numpy_numericalonly_nan':

test/test_data/test_target_validator.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,8 @@ def test_targetvalidator_inversetransform():
450450
y_decoded = validator.inverse_transform(y)
451451
assert ['a', 'a', 'b', 'c', 'a'] == y_decoded.tolist()
452452

453+
assert validator.classes_.tolist() == ['a', 'b', 'c']
454+
453455
validator = TargetValidator(is_classification=True)
454456
multi_label = pd.DataFrame(
455457
np.array([[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]]),
@@ -461,6 +463,10 @@ def test_targetvalidator_inversetransform():
461463
y_decoded = validator.inverse_transform(y)
462464
np.testing.assert_array_almost_equal(y, y_decoded)
463465

466+
# Multilabel classification is not encoded
467+
# For this reason, classes_ attribute does not contain a class
468+
np.testing.assert_array_almost_equal(validator.classes_, np.array([]))
469+
464470

465471
# Actual checks for the targets
466472
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)