Skip to content

Commit 07b8f2e

Browse files
fix datapreprocessing clonability (#925)
* fix datapreprocessing clonability * Fix check is fitted * Feedback from comments * Enforce is fitted to every pipeline stage
1 parent 49ab96d commit 07b8f2e

File tree

13 files changed

+86
-10
lines changed

13 files changed

+86
-10
lines changed

autosklearn/automl.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,11 @@ def predict(self, X, batch_size=None, n_jobs=1):
650650
check_is_fitted(tmp_model.steps[-1][-1])
651651
models = self.models_
652652
except sklearn.exceptions.NotFittedError:
653+
# When training a cross validation model, self.cv_models_
654+
# will contain the Voting classifier/regressor product of cv
655+
# self.models_ in the case of cv, contains unfitted models
656+
# Raising above exception is a mechanism to detect which
657+
# attribute contains the relevant models for prediction
653658
try:
654659
check_is_fitted(list(self.cv_models_.values())[0])
655660
models = self.cv_models_

autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def __init__(self, strategy='none', random_state=None):
1414
self.random_state = random_state
1515

1616
def fit(self, X, y=None):
17+
self.fitted_ = True
1718
return self
1819

1920
def transform(self, X):

autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ def __init__(self, random_state=None):
99
pass
1010

1111
def fit(self, X, y=None):
12+
self.fitted_ = True
1213
return self
1314

1415
def transform(self, X):

autosklearn/pipeline/components/data_preprocessing/data_preprocessing.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ class DataPreprocessor(AutoSklearnComponent):
2222

2323
def __init__(self, config=None, pipeline=None, dataset_properties=None, include=None,
2424
exclude=None, random_state=None, init_params=None,
25-
categorical_features=None, force_sparse_output=False):
25+
categorical_features=None, force_sparse_output=False,
26+
column_transformer=None):
2627

2728
if pipeline is not None:
2829
raise ValueError("DataPreprocessor's argument 'pipeline' should be None")
@@ -70,6 +71,7 @@ def __init__(self, config=None, pipeline=None, dataset_properties=None, include=
7071
]
7172
if self.config:
7273
self.set_hyperparameters(self.config, init_params=init_params)
74+
self.column_transformer = column_transformer
7375

7476
def fit(self, X, y=None):
7577

@@ -105,7 +107,10 @@ def fit(self, X, y=None):
105107

106108
def transform(self, X):
107109
if self.column_transformer is None:
108-
raise NotImplementedError()
110+
raise ValueError("Cannot call transform on a Datapreprocessor that has not"
111+
"yet been fit. Please check the log files for errors "
112+
"while trying to fit the model."
113+
)
109114
return self.column_transformer.transform(X)
110115

111116
def fit_transform(self, X, y=None):

autosklearn/pipeline/components/data_preprocessing/data_preprocessing_categorical.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
8383
cs=cs, dataset_properties=dataset_properties,
8484
exclude=exclude, include=include, pipeline=self.steps)
8585

86-
self.configuration_space_ = cs
8786
return cs
8887

8988
def _get_pipeline_steps(self, dataset_properties=None):

autosklearn/pipeline/components/data_preprocessing/data_preprocessing_numerical.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
8585
cs=cs, dataset_properties=dataset_properties,
8686
exclude=exclude, include=include, pipeline=self.steps)
8787

88-
self.configuration_space_ = cs
8988
return cs
9089

9190
def _get_pipeline_steps(self, dataset_properties=None):

autosklearn/pipeline/components/feature_preprocessing/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,6 @@ def get_hyperparameter_search_space(self, dataset_properties=None,
121121
cs.add_configuration_space(name, preprocessor_configuration_space,
122122
parent_hyperparameter=parent_hyperparameter)
123123

124-
self.configuration_space_ = cs
125-
self.dataset_properties_ = dataset_properties
126124
return cs
127125

128126
def transform(self, X):

autosklearn/pipeline/components/feature_preprocessing/densifier.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ def __init__(self, random_state=None):
99
pass
1010

1111
def fit(self, X, y=None):
12+
self.fitted_ = True
1213
return self
1314

1415
def transform(self, X):

autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def __init__(self, random_state):
1212

1313
def fit(self, X, Y=None):
1414
self.preprocessor = 0
15+
self.fitted_ = True
1516
return self
1617

1718
def transform(self, X):

autosklearn/pipeline/components/regression/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,6 @@ def get_hyperparameter_search_space(self, dataset_properties=None,
108108
cs.add_configuration_space(estimator_name, estimator_configuration_space,
109109
parent_hyperparameter=parent_hyperparameter)
110110

111-
self.configuration_space_ = cs
112-
self.dataset_properties_ = dataset_properties
113111
return cs
114112

115113
def estimator_supports_iterative_fit(self):

0 commit comments

Comments
 (0)