55import re
66import os
77import sys
8+ from typing import Any
89import unittest
910from distutils .version import LooseVersion
1011from collections import OrderedDict
@@ -73,6 +74,45 @@ def setUp(self):
7374
7475 self .extension = SklearnExtension ()
7576
77+ def _get_expected_pipeline_description (self , model : Any ) -> str :
78+ if version .parse (sklearn .__version__ ) >= version .parse ("1.0" ):
79+ expected_fixture = (
80+ "Pipeline of transforms with a final estimator.\n \n Sequentially"
81+ " apply a list of transforms and a final estimator.\n "
82+ "Intermediate steps of the pipeline must be 'transforms', that "
83+ "is, they\n must implement `fit` and `transform` methods.\n The final "
84+ "estimator only needs to implement `fit`.\n The transformers in "
85+ "the pipeline can be cached using ``memory`` argument.\n \n The "
86+ "purpose of the pipeline is to assemble several steps that can "
87+ "be\n cross-validated together while setting different parameters"
88+ ". For this, it\n enables setting parameters of the various steps"
89+ " using their names and the\n parameter name separated by a `'__'`,"
90+ " as in the example below. A step's\n estimator may be replaced "
91+ "entirely by setting the parameter with its name\n to another "
92+ "estimator, or a transformer removed by setting it to\n "
93+ "`'passthrough'` or `None`."
94+ )
95+ elif version .parse (sklearn .__version__ ) >= version .parse ("0.21.0" ):
96+ expected_fixture = (
97+ "Pipeline of transforms with a final estimator.\n \n Sequentially"
98+ " apply a list of transforms and a final estimator.\n "
99+ "Intermediate steps of the pipeline must be 'transforms', that "
100+ "is, they\n must implement fit and transform methods.\n The final "
101+ "estimator only needs to implement fit.\n The transformers in "
102+ "the pipeline can be cached using ``memory`` argument.\n \n The "
103+ "purpose of the pipeline is to assemble several steps that can "
104+ "be\n cross-validated together while setting different parameters"
105+ ".\n For this, it enables setting parameters of the various steps"
106+ " using their\n names and the parameter name separated by a '__',"
107+ " as in the example below.\n A step's estimator may be replaced "
108+ "entirely by setting the parameter\n with its name to another "
109+ "estimator, or a transformer removed by setting\n it to "
110+ "'passthrough' or ``None``."
111+ )
112+ else :
113+ expected_fixture = self .extension ._get_sklearn_description (model )
114+ return expected_fixture
115+
76116 def _serialization_test_helper (
77117 self , model , X , y , subcomponent_parameters , dependencies_mock_call_count = (1 , 2 )
78118 ):
@@ -398,44 +438,7 @@ def test_serialize_pipeline(self):
398438 "dummy=sklearn.dummy.DummyClassifier)" .format (scaler_name )
399439 )
400440 fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
401-
402- if version .parse (sklearn .__version__ ) >= version .parse ("1.0" ):
403- fixture_description = (
404- "Pipeline of transforms with a final estimator.\n \n Sequentially"
405- " apply a list of transforms and a final estimator.\n "
406- "Intermediate steps of the pipeline must be 'transforms', that "
407- "is, they\n must implement `fit` and `transform` methods.\n The final "
408- "estimator only needs to implement `fit`.\n The transformers in "
409- "the pipeline can be cached using ``memory`` argument.\n \n The "
410- "purpose of the pipeline is to assemble several steps that can "
411- "be\n cross-validated together while setting different parameters"
412- ". For this, it\n enables setting parameters of the various steps"
413- " using their names and the\n parameter name separated by a `'__'`,"
414- " as in the example below. A step's\n estimator may be replaced "
415- "entirely by setting the parameter with its name\n to another "
416- "estimator, or a transformer removed by setting it to\n "
417- "`'passthrough'` or `None`."
418- )
419- elif version .parse (sklearn .__version__ ) >= version .parse ("0.21.0" ):
420- fixture_description = (
421- "Pipeline of transforms with a final estimator.\n \n Sequentially"
422- " apply a list of transforms and a final estimator.\n "
423- "Intermediate steps of the pipeline must be 'transforms', that "
424- "is, they\n must implement fit and transform methods.\n The final "
425- "estimator only needs to implement fit.\n The transformers in "
426- "the pipeline can be cached using ``memory`` argument.\n \n The "
427- "purpose of the pipeline is to assemble several steps that can "
428- "be\n cross-validated together while setting different parameters"
429- ".\n For this, it enables setting parameters of the various steps"
430- " using their\n names and the parameter name separated by a '__',"
431- " as in the example below.\n A step's estimator may be replaced "
432- "entirely by setting the parameter\n with its name to another "
433- "estimator, or a transformer removed by setting\n it to "
434- "'passthrough' or ``None``."
435- )
436- else :
437- fixture_description = self .extension ._get_sklearn_description (model )
438-
441+ fixture_description = self ._get_expected_pipeline_description (model )
439442 fixture_structure = {
440443 fixture_name : [],
441444 "sklearn.preprocessing.{}.StandardScaler" .format (scaler_name ): ["scaler" ],
@@ -505,43 +508,7 @@ def test_serialize_pipeline_clustering(self):
505508 "clusterer=sklearn.cluster.{}.KMeans)" .format (scaler_name , cluster_name )
506509 )
507510 fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
508-
509- if version .parse (sklearn .__version__ ) >= version .parse ("1.0" ):
510- fixture_description = (
511- "Pipeline of transforms with a final estimator.\n \n Sequentially"
512- " apply a list of transforms and a final estimator.\n "
513- "Intermediate steps of the pipeline must be 'transforms', that "
514- "is, they\n must implement `fit` and `transform` methods.\n The final "
515- "estimator only needs to implement `fit`.\n The transformers in "
516- "the pipeline can be cached using ``memory`` argument.\n \n The "
517- "purpose of the pipeline is to assemble several steps that can "
518- "be\n cross-validated together while setting different parameters"
519- ". For this, it\n enables setting parameters of the various steps"
520- " using their names and the\n parameter name separated by a `'__'`,"
521- " as in the example below. A step's\n estimator may be replaced "
522- "entirely by setting the parameter with its name\n to another "
523- "estimator, or a transformer removed by setting it to\n "
524- "`'passthrough'` or `None`."
525- )
526- elif version .parse (sklearn .__version__ ) >= version .parse ("0.21.0" ):
527- fixture_description = (
528- "Pipeline of transforms with a final estimator.\n \n Sequentially"
529- " apply a list of transforms and a final estimator.\n "
530- "Intermediate steps of the pipeline must be 'transforms', that "
531- "is, they\n must implement fit and transform methods.\n The final "
532- "estimator only needs to implement fit.\n The transformers in "
533- "the pipeline can be cached using ``memory`` argument.\n \n The "
534- "purpose of the pipeline is to assemble several steps that can "
535- "be\n cross-validated together while setting different parameters"
536- ".\n For this, it enables setting parameters of the various steps"
537- " using their\n names and the parameter name separated by a '__',"
538- " as in the example below.\n A step's estimator may be replaced "
539- "entirely by setting the parameter\n with its name to another "
540- "estimator, or a transformer removed by setting\n it to "
541- "'passthrough' or ``None``."
542- )
543- else :
544- fixture_description = self .extension ._get_sklearn_description (model )
511+ fixture_description = self ._get_expected_pipeline_description (model )
545512 fixture_structure = {
546513 fixture_name : [],
547514 "sklearn.preprocessing.{}.StandardScaler" .format (scaler_name ): ["scaler" ],
@@ -699,27 +666,7 @@ def test_serialize_column_transformer_pipeline(self):
699666 fixture_name : [],
700667 }
701668
702- if version .parse (sklearn .__version__ ) >= version .parse ("0.21.0" ):
703- # str obtained from self.extension._get_sklearn_description(model)
704- fixture_description = (
705- "Pipeline of transforms with a final estimator.\n \n Sequentially"
706- " apply a list of transforms and a final estimator.\n "
707- "Intermediate steps of the pipeline must be 'transforms', that "
708- "is, they\n must implement fit and transform methods.\n The final"
709- " estimator only needs to implement fit.\n The transformers in "
710- "the pipeline can be cached using ``memory`` argument.\n \n The "
711- "purpose of the pipeline is to assemble several steps that can "
712- "be\n cross-validated together while setting different "
713- "parameters.\n For this, it enables setting parameters of the "
714- "various steps using their\n names and the parameter name "
715- "separated by a '__', as in the example below.\n A step's "
716- "estimator may be replaced entirely by setting the parameter\n "
717- "with its name to another estimator, or a transformer removed by"
718- " setting\n it to 'passthrough' or ``None``."
719- )
720- else :
721- fixture_description = self .extension ._get_sklearn_description (model )
722-
669+ fixture_description = self ._get_expected_pipeline_description (model )
723670 serialization , new_model = self ._serialization_test_helper (
724671 model ,
725672 X = None ,
@@ -1494,9 +1441,7 @@ def test_deserialize_complex_with_defaults(self):
14941441 "Estimator" ,
14951442 sklearn .ensemble .AdaBoostClassifier (
14961443 sklearn .ensemble .BaggingClassifier (
1497- sklearn .ensemble .GradientBoostingClassifier (
1498- sklearn .neighbors .KNeighborsClassifier ()
1499- )
1444+ sklearn .ensemble .GradientBoostingClassifier ()
15001445 )
15011446 ),
15021447 ),
@@ -1511,7 +1456,6 @@ def test_deserialize_complex_with_defaults(self):
15111456 "Estimator__n_estimators" : 10 ,
15121457 "Estimator__base_estimator__n_estimators" : 10 ,
15131458 "Estimator__base_estimator__base_estimator__learning_rate" : 0.1 ,
1514- "Estimator__base_estimator__base_estimator__loss__n_neighbors" : 13 ,
15151459 }
15161460 else :
15171461 params = {
@@ -1520,7 +1464,6 @@ def test_deserialize_complex_with_defaults(self):
15201464 "Estimator__n_estimators" : 50 ,
15211465 "Estimator__base_estimator__n_estimators" : 10 ,
15221466 "Estimator__base_estimator__base_estimator__learning_rate" : 0.1 ,
1523- "Estimator__base_estimator__base_estimator__loss__n_neighbors" : 5 ,
15241467 }
15251468 pipe_adjusted .set_params (** params )
15261469 flow = self .extension .model_to_flow (pipe_adjusted )
@@ -1886,9 +1829,6 @@ def test_run_model_on_fold_classification_3(self):
18861829 class HardNaiveBayes (sklearn .naive_bayes .GaussianNB ):
18871830 # class for testing a naive bayes classifier that does not allow soft
18881831 # predictions
1889- def __init__ (self , priors = None ):
1890- super (HardNaiveBayes , self ).__init__ (priors )
1891-
18921832 def predict_proba (* args , ** kwargs ):
18931833 raise AttributeError ("predict_proba is not available when " "probability=False" )
18941834
@@ -2059,7 +1999,7 @@ def test__extract_trace_data(self):
20591999 clf = sklearn .model_selection .RandomizedSearchCV (
20602000 sklearn .neural_network .MLPClassifier (),
20612001 param_grid ,
2062- num_iters ,
2002+ n_iter = num_iters ,
20632003 )
20642004 # just run the task on the model (without invoking any fancy extension & openml code)
20652005 train , _ = task .get_train_test_split_indices (0 , 0 )
0 commit comments