Skip to content

Commit 75fed8a

Browse files
authored
Update more sklearn tests (#1175)
* n_iter is now keyword-only * Standardize sklearn pipeline description lookups * `priors` is no longer positional, and wasn't used in the first place * Remove loss=kneighbours from the complex pipelin
1 parent e6250fa commit 75fed8a

File tree

1 file changed

+45
-105
lines changed

1 file changed

+45
-105
lines changed

tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py

Lines changed: 45 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import re
66
import os
77
import sys
8+
from typing import Any
89
import unittest
910
from distutils.version import LooseVersion
1011
from collections import OrderedDict
@@ -73,6 +74,45 @@ def setUp(self):
7374

7475
self.extension = SklearnExtension()
7576

77+
def _get_expected_pipeline_description(self, model: Any) -> str:
78+
if version.parse(sklearn.__version__) >= version.parse("1.0"):
79+
expected_fixture = (
80+
"Pipeline of transforms with a final estimator.\n\nSequentially"
81+
" apply a list of transforms and a final estimator.\n"
82+
"Intermediate steps of the pipeline must be 'transforms', that "
83+
"is, they\nmust implement `fit` and `transform` methods.\nThe final "
84+
"estimator only needs to implement `fit`.\nThe transformers in "
85+
"the pipeline can be cached using ``memory`` argument.\n\nThe "
86+
"purpose of the pipeline is to assemble several steps that can "
87+
"be\ncross-validated together while setting different parameters"
88+
". For this, it\nenables setting parameters of the various steps"
89+
" using their names and the\nparameter name separated by a `'__'`,"
90+
" as in the example below. A step's\nestimator may be replaced "
91+
"entirely by setting the parameter with its name\nto another "
92+
"estimator, or a transformer removed by setting it to\n"
93+
"`'passthrough'` or `None`."
94+
)
95+
elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
96+
expected_fixture = (
97+
"Pipeline of transforms with a final estimator.\n\nSequentially"
98+
" apply a list of transforms and a final estimator.\n"
99+
"Intermediate steps of the pipeline must be 'transforms', that "
100+
"is, they\nmust implement fit and transform methods.\nThe final "
101+
"estimator only needs to implement fit.\nThe transformers in "
102+
"the pipeline can be cached using ``memory`` argument.\n\nThe "
103+
"purpose of the pipeline is to assemble several steps that can "
104+
"be\ncross-validated together while setting different parameters"
105+
".\nFor this, it enables setting parameters of the various steps"
106+
" using their\nnames and the parameter name separated by a '__',"
107+
" as in the example below.\nA step's estimator may be replaced "
108+
"entirely by setting the parameter\nwith its name to another "
109+
"estimator, or a transformer removed by setting\nit to "
110+
"'passthrough' or ``None``."
111+
)
112+
else:
113+
expected_fixture = self.extension._get_sklearn_description(model)
114+
return expected_fixture
115+
76116
def _serialization_test_helper(
77117
self, model, X, y, subcomponent_parameters, dependencies_mock_call_count=(1, 2)
78118
):
@@ -398,44 +438,7 @@ def test_serialize_pipeline(self):
398438
"dummy=sklearn.dummy.DummyClassifier)".format(scaler_name)
399439
)
400440
fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
401-
402-
if version.parse(sklearn.__version__) >= version.parse("1.0"):
403-
fixture_description = (
404-
"Pipeline of transforms with a final estimator.\n\nSequentially"
405-
" apply a list of transforms and a final estimator.\n"
406-
"Intermediate steps of the pipeline must be 'transforms', that "
407-
"is, they\nmust implement `fit` and `transform` methods.\nThe final "
408-
"estimator only needs to implement `fit`.\nThe transformers in "
409-
"the pipeline can be cached using ``memory`` argument.\n\nThe "
410-
"purpose of the pipeline is to assemble several steps that can "
411-
"be\ncross-validated together while setting different parameters"
412-
". For this, it\nenables setting parameters of the various steps"
413-
" using their names and the\nparameter name separated by a `'__'`,"
414-
" as in the example below. A step's\nestimator may be replaced "
415-
"entirely by setting the parameter with its name\nto another "
416-
"estimator, or a transformer removed by setting it to\n"
417-
"`'passthrough'` or `None`."
418-
)
419-
elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
420-
fixture_description = (
421-
"Pipeline of transforms with a final estimator.\n\nSequentially"
422-
" apply a list of transforms and a final estimator.\n"
423-
"Intermediate steps of the pipeline must be 'transforms', that "
424-
"is, they\nmust implement fit and transform methods.\nThe final "
425-
"estimator only needs to implement fit.\nThe transformers in "
426-
"the pipeline can be cached using ``memory`` argument.\n\nThe "
427-
"purpose of the pipeline is to assemble several steps that can "
428-
"be\ncross-validated together while setting different parameters"
429-
".\nFor this, it enables setting parameters of the various steps"
430-
" using their\nnames and the parameter name separated by a '__',"
431-
" as in the example below.\nA step's estimator may be replaced "
432-
"entirely by setting the parameter\nwith its name to another "
433-
"estimator, or a transformer removed by setting\nit to "
434-
"'passthrough' or ``None``."
435-
)
436-
else:
437-
fixture_description = self.extension._get_sklearn_description(model)
438-
441+
fixture_description = self._get_expected_pipeline_description(model)
439442
fixture_structure = {
440443
fixture_name: [],
441444
"sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
@@ -505,43 +508,7 @@ def test_serialize_pipeline_clustering(self):
505508
"clusterer=sklearn.cluster.{}.KMeans)".format(scaler_name, cluster_name)
506509
)
507510
fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
508-
509-
if version.parse(sklearn.__version__) >= version.parse("1.0"):
510-
fixture_description = (
511-
"Pipeline of transforms with a final estimator.\n\nSequentially"
512-
" apply a list of transforms and a final estimator.\n"
513-
"Intermediate steps of the pipeline must be 'transforms', that "
514-
"is, they\nmust implement `fit` and `transform` methods.\nThe final "
515-
"estimator only needs to implement `fit`.\nThe transformers in "
516-
"the pipeline can be cached using ``memory`` argument.\n\nThe "
517-
"purpose of the pipeline is to assemble several steps that can "
518-
"be\ncross-validated together while setting different parameters"
519-
". For this, it\nenables setting parameters of the various steps"
520-
" using their names and the\nparameter name separated by a `'__'`,"
521-
" as in the example below. A step's\nestimator may be replaced "
522-
"entirely by setting the parameter with its name\nto another "
523-
"estimator, or a transformer removed by setting it to\n"
524-
"`'passthrough'` or `None`."
525-
)
526-
elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
527-
fixture_description = (
528-
"Pipeline of transforms with a final estimator.\n\nSequentially"
529-
" apply a list of transforms and a final estimator.\n"
530-
"Intermediate steps of the pipeline must be 'transforms', that "
531-
"is, they\nmust implement fit and transform methods.\nThe final "
532-
"estimator only needs to implement fit.\nThe transformers in "
533-
"the pipeline can be cached using ``memory`` argument.\n\nThe "
534-
"purpose of the pipeline is to assemble several steps that can "
535-
"be\ncross-validated together while setting different parameters"
536-
".\nFor this, it enables setting parameters of the various steps"
537-
" using their\nnames and the parameter name separated by a '__',"
538-
" as in the example below.\nA step's estimator may be replaced "
539-
"entirely by setting the parameter\nwith its name to another "
540-
"estimator, or a transformer removed by setting\nit to "
541-
"'passthrough' or ``None``."
542-
)
543-
else:
544-
fixture_description = self.extension._get_sklearn_description(model)
511+
fixture_description = self._get_expected_pipeline_description(model)
545512
fixture_structure = {
546513
fixture_name: [],
547514
"sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
@@ -699,27 +666,7 @@ def test_serialize_column_transformer_pipeline(self):
699666
fixture_name: [],
700667
}
701668

702-
if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
703-
# str obtained from self.extension._get_sklearn_description(model)
704-
fixture_description = (
705-
"Pipeline of transforms with a final estimator.\n\nSequentially"
706-
" apply a list of transforms and a final estimator.\n"
707-
"Intermediate steps of the pipeline must be 'transforms', that "
708-
"is, they\nmust implement fit and transform methods.\nThe final"
709-
" estimator only needs to implement fit.\nThe transformers in "
710-
"the pipeline can be cached using ``memory`` argument.\n\nThe "
711-
"purpose of the pipeline is to assemble several steps that can "
712-
"be\ncross-validated together while setting different "
713-
"parameters.\nFor this, it enables setting parameters of the "
714-
"various steps using their\nnames and the parameter name "
715-
"separated by a '__', as in the example below.\nA step's "
716-
"estimator may be replaced entirely by setting the parameter\n"
717-
"with its name to another estimator, or a transformer removed by"
718-
" setting\nit to 'passthrough' or ``None``."
719-
)
720-
else:
721-
fixture_description = self.extension._get_sklearn_description(model)
722-
669+
fixture_description = self._get_expected_pipeline_description(model)
723670
serialization, new_model = self._serialization_test_helper(
724671
model,
725672
X=None,
@@ -1494,9 +1441,7 @@ def test_deserialize_complex_with_defaults(self):
14941441
"Estimator",
14951442
sklearn.ensemble.AdaBoostClassifier(
14961443
sklearn.ensemble.BaggingClassifier(
1497-
sklearn.ensemble.GradientBoostingClassifier(
1498-
sklearn.neighbors.KNeighborsClassifier()
1499-
)
1444+
sklearn.ensemble.GradientBoostingClassifier()
15001445
)
15011446
),
15021447
),
@@ -1511,7 +1456,6 @@ def test_deserialize_complex_with_defaults(self):
15111456
"Estimator__n_estimators": 10,
15121457
"Estimator__base_estimator__n_estimators": 10,
15131458
"Estimator__base_estimator__base_estimator__learning_rate": 0.1,
1514-
"Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
15151459
}
15161460
else:
15171461
params = {
@@ -1520,7 +1464,6 @@ def test_deserialize_complex_with_defaults(self):
15201464
"Estimator__n_estimators": 50,
15211465
"Estimator__base_estimator__n_estimators": 10,
15221466
"Estimator__base_estimator__base_estimator__learning_rate": 0.1,
1523-
"Estimator__base_estimator__base_estimator__loss__n_neighbors": 5,
15241467
}
15251468
pipe_adjusted.set_params(**params)
15261469
flow = self.extension.model_to_flow(pipe_adjusted)
@@ -1886,9 +1829,6 @@ def test_run_model_on_fold_classification_3(self):
18861829
class HardNaiveBayes(sklearn.naive_bayes.GaussianNB):
18871830
# class for testing a naive bayes classifier that does not allow soft
18881831
# predictions
1889-
def __init__(self, priors=None):
1890-
super(HardNaiveBayes, self).__init__(priors)
1891-
18921832
def predict_proba(*args, **kwargs):
18931833
raise AttributeError("predict_proba is not available when " "probability=False")
18941834

@@ -2059,7 +1999,7 @@ def test__extract_trace_data(self):
20591999
clf = sklearn.model_selection.RandomizedSearchCV(
20602000
sklearn.neural_network.MLPClassifier(),
20612001
param_grid,
2062-
num_iters,
2002+
n_iter=num_iters,
20632003
)
20642004
# just run the task on the model (without invoking any fancy extension & openml code)
20652005
train, _ = task.get_train_test_split_indices(0, 0)

0 commit comments

Comments
 (0)