Skip to content

Commit 8ae3c67

Browse files
committed
ADD unittests for regression, FIX SGD iterative fit for regression
1 parent b5213de commit 8ae3c67

File tree

6 files changed

+134
-32
lines changed

6 files changed

+134
-32
lines changed

autosklearn/pipeline/components/regression/sgd.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def iterative_fit(self, X, y, n_iter=1, refit=False):
4343
self.scaler = None
4444

4545
if self.estimator is None:
46+
self._iterations = 0
47+
4648
self.alpha = float(self.alpha)
4749
self.fit_intercept = self.fit_intercept == 'True'
4850
self.n_iter = int(self.n_iter)
@@ -73,14 +75,16 @@ def iterative_fit(self, X, y, n_iter=1, refit=False):
7375

7476
Y_scaled = self.scaler.transform(y)
7577

76-
self.estimator.n_iter += n_iter
77-
self.estimator.fit(X, Y_scaled)
78+
self.estimator.n_iter = n_iter
79+
self._iterations += n_iter
80+
print(n_iter)
81+
self.estimator.partial_fit(X, Y_scaled)
7882
return self
7983

8084
def configuration_fully_fitted(self):
8185
if self.estimator is None:
8286
return False
83-
return not self.estimator.n_iter < self.n_iter
87+
return not self._iterations < self.n_iter
8488

8589
def predict(self, X):
8690
if self.estimator is None:

test/test_pipeline/components/regression/test_sgd.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ class SGDComponentTest(unittest.TestCase):
1010
def test_default_configuration(self):
1111
for i in range(10):
1212
predictions, targets = _test_regressor(SGD)
13-
self.assertAlmostEqual(0.092460881802630235,
13+
self.assertAlmostEqual(0.078043497701660636,
1414
sklearn.metrics.r2_score(y_true=targets,
1515
y_pred=predictions))
1616

1717
def test_default_configuration_iterative_fit(self):
1818
for i in range(10):
1919
predictions, targets = _test_regressor_iterative_fit(SGD)
20-
self.assertAlmostEqual(0.092460881802630235,
20+
self.assertAlmostEqual(0.078043497701660636,
2121
sklearn.metrics.r2_score(y_true=targets,
2222
y_pred=predictions))

test/test_pipeline/test_classification.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def get_hyperparameter_search_space(dataset_properties=None):
6565

6666

6767
class SimpleClassificationPipelineTest(unittest.TestCase):
68+
_multiprocess_can_split_ = True
69+
6870
def test_io_dict(self):
6971
classifiers = classification_components._classifiers
7072
for c in classifiers:

test/test_pipeline/test_create_searchspace_util_classification.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import autosklearn.pipeline.create_searchspace_util
1818

1919
class TestCreateClassificationSearchspace(unittest.TestCase):
20+
_multiprocess_can_split_ = True
2021

2122
def test_get_match_array_sparse_and_dense(self):
2223
# preproc is empty

test/test_pipeline/test_regression.py

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
__author__ = 'eggenspk'
2-
31
import copy
42
import resource
3+
import sys
54
import traceback
65
import unittest
76

@@ -26,6 +25,7 @@
2625

2726

2827
class SimpleRegressionPipelineTest(unittest.TestCase):
28+
_multiprocess_can_split_ = True
2929

3030
def test_io_dict(self):
3131
regressors = regression_components._regressors
@@ -133,6 +133,126 @@ def test_configurations(self):
133133
except MemoryError as e:
134134
continue
135135

136+
def test_configurations_signed_data(self):
137+
# Use a limit of ~4GiB
138+
limit = 4000 * 1024 * 1024
139+
resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
140+
141+
cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
142+
dataset_properties={'signed': True})
143+
144+
print(cs)
145+
146+
for i in range(10):
147+
config = cs.sample_configuration()
148+
config._populate_values()
149+
if 'classifier:passive_aggressive:n_iter' in config and \
150+
config[
151+
'classifier:passive_aggressive:n_iter'] is not None:
152+
config._values['classifier:passive_aggressive:n_iter'] = 5
153+
if 'classifier:sgd:n_iter' in config and \
154+
config['classifier:sgd:n_iter'] is not None:
155+
config._values['classifier:sgd:n_iter'] = 5
156+
157+
X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
158+
cls = SimpleRegressionPipeline(config, random_state=1)
159+
print(config)
160+
try:
161+
cls.fit(X_train, Y_train)
162+
X_test_ = X_test.copy()
163+
predictions = cls.predict(X_test)
164+
self.assertIsInstance(predictions, np.ndarray)
165+
predicted_probabiliets = cls.predict(X_test_)
166+
self.assertIsInstance(predicted_probabiliets, np.ndarray)
167+
except ValueError as e:
168+
if "Floating-point under-/overflow occurred at epoch" in \
169+
e.args[0] or \
170+
"removed all features" in e.args[0] or \
171+
"all features are discarded" in e.args[0] or \
172+
"Bug in scikit-learn" in e.args[0]:
173+
continue
174+
else:
175+
print(config)
176+
print(traceback.format_exc())
177+
raise e
178+
except RuntimeWarning as e:
179+
if "invalid value encountered in sqrt" in e.args[0]:
180+
continue
181+
elif "divide by zero encountered in" in e.args[0]:
182+
continue
183+
elif "invalid value encountered in divide" in e.args[0]:
184+
continue
185+
elif "invalid value encountered in true_divide" in e.args[0]:
186+
continue
187+
else:
188+
print(config)
189+
print(traceback.format_exc())
190+
raise e
191+
except UserWarning as e:
192+
if "FastICA did not converge" in e.args[0]:
193+
continue
194+
else:
195+
print(config)
196+
print(traceback.format_exc())
197+
raise e
198+
except MemoryError as e:
199+
continue
200+
201+
def test_configurations_sparse(self):
202+
# Use a limit of ~4GiB
203+
limit = 4000 * 1024 * 1024
204+
resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
205+
206+
cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
207+
dataset_properties={'sparse': True})
208+
print(cs)
209+
for i in range(10):
210+
config = cs.sample_configuration()
211+
config._populate_values()
212+
if 'classifier:passive_aggressive:n_iter' in config and \
213+
config[
214+
'classifier:passive_aggressive:n_iter'] is not None:
215+
config._values['classifier:passive_aggressive:n_iter'] = 5
216+
if 'classifier:sgd:n_iter' in config and \
217+
config['classifier:sgd:n_iter'] is not None:
218+
config._values['classifier:sgd:n_iter'] = 5
219+
220+
print(config)
221+
X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
222+
make_sparse=True)
223+
cls = SimpleRegressionPipeline(config, random_state=1)
224+
try:
225+
cls.fit(X_train, Y_train)
226+
predictions = cls.predict(X_test)
227+
except ValueError as e:
228+
if "Floating-point under-/overflow occurred at epoch" in \
229+
e.args[0] or \
230+
"removed all features" in e.args[0] or \
231+
"all features are discarded" in e.args[0]:
232+
continue
233+
else:
234+
print(config)
235+
traceback.print_tb(sys.exc_info()[2])
236+
raise e
237+
except RuntimeWarning as e:
238+
if "invalid value encountered in sqrt" in e.args[0]:
239+
continue
240+
elif "divide by zero encountered in" in e.args[0]:
241+
continue
242+
elif "invalid value encountered in divide" in e.args[0]:
243+
continue
244+
elif "invalid value encountered in true_divide" in e.args[0]:
245+
continue
246+
else:
247+
print(config)
248+
raise e
249+
except UserWarning as e:
250+
if "FastICA did not converge" in e.args[0]:
251+
continue
252+
else:
253+
print(config)
254+
raise e
255+
136256
def test_default_configuration(self):
137257
for i in range(2):
138258
cs = SimpleRegressionPipeline.get_hyperparameter_search_space()

test/test_pipeline/test_textclassification.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

0 commit comments

Comments
 (0)