|
1 | | -__author__ = 'eggenspk' |
2 | | - |
3 | 1 | import copy |
4 | 2 | import resource |
| 3 | +import sys |
5 | 4 | import traceback |
6 | 5 | import unittest |
7 | 6 |
|
|
26 | 25 |
|
27 | 26 |
|
28 | 27 | class SimpleRegressionPipelineTest(unittest.TestCase): |
| 28 | + _multiprocess_can_split_ = True |
29 | 29 |
|
30 | 30 | def test_io_dict(self): |
31 | 31 | regressors = regression_components._regressors |
@@ -133,6 +133,126 @@ def test_configurations(self): |
133 | 133 | except MemoryError as e: |
134 | 134 | continue |
135 | 135 |
|
| 136 | + def test_configurations_signed_data(self): |
| 137 | + # Use a limit of ~4GiB |
| 138 | + limit = 4000 * 1024 * 1024 |
| 139 | + resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) |
| 140 | + |
| 141 | + cs = SimpleRegressionPipeline.get_hyperparameter_search_space( |
| 142 | + dataset_properties={'signed': True}) |
| 143 | + |
| 144 | + print(cs) |
| 145 | + |
| 146 | + for i in range(10): |
| 147 | + config = cs.sample_configuration() |
| 148 | + config._populate_values() |
| 149 | + if 'classifier:passive_aggressive:n_iter' in config and \ |
| 150 | + config[ |
| 151 | + 'classifier:passive_aggressive:n_iter'] is not None: |
| 152 | + config._values['classifier:passive_aggressive:n_iter'] = 5 |
| 153 | + if 'classifier:sgd:n_iter' in config and \ |
| 154 | + config['classifier:sgd:n_iter'] is not None: |
| 155 | + config._values['classifier:sgd:n_iter'] = 5 |
| 156 | + |
| 157 | + X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') |
| 158 | + cls = SimpleRegressionPipeline(config, random_state=1) |
| 159 | + print(config) |
| 160 | + try: |
| 161 | + cls.fit(X_train, Y_train) |
| 162 | + X_test_ = X_test.copy() |
| 163 | + predictions = cls.predict(X_test) |
| 164 | + self.assertIsInstance(predictions, np.ndarray) |
| 165 | + predicted_probabiliets = cls.predict(X_test_) |
| 166 | + self.assertIsInstance(predicted_probabiliets, np.ndarray) |
| 167 | + except ValueError as e: |
| 168 | + if "Floating-point under-/overflow occurred at epoch" in \ |
| 169 | + e.args[0] or \ |
| 170 | + "removed all features" in e.args[0] or \ |
| 171 | + "all features are discarded" in e.args[0] or \ |
| 172 | + "Bug in scikit-learn" in e.args[0]: |
| 173 | + continue |
| 174 | + else: |
| 175 | + print(config) |
| 176 | + print(traceback.format_exc()) |
| 177 | + raise e |
| 178 | + except RuntimeWarning as e: |
| 179 | + if "invalid value encountered in sqrt" in e.args[0]: |
| 180 | + continue |
| 181 | + elif "divide by zero encountered in" in e.args[0]: |
| 182 | + continue |
| 183 | + elif "invalid value encountered in divide" in e.args[0]: |
| 184 | + continue |
| 185 | + elif "invalid value encountered in true_divide" in e.args[0]: |
| 186 | + continue |
| 187 | + else: |
| 188 | + print(config) |
| 189 | + print(traceback.format_exc()) |
| 190 | + raise e |
| 191 | + except UserWarning as e: |
| 192 | + if "FastICA did not converge" in e.args[0]: |
| 193 | + continue |
| 194 | + else: |
| 195 | + print(config) |
| 196 | + print(traceback.format_exc()) |
| 197 | + raise e |
| 198 | + except MemoryError as e: |
| 199 | + continue |
| 200 | + |
| 201 | + def test_configurations_sparse(self): |
| 202 | + # Use a limit of ~4GiB |
| 203 | + limit = 4000 * 1024 * 1024 |
| 204 | + resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) |
| 205 | + |
| 206 | + cs = SimpleRegressionPipeline.get_hyperparameter_search_space( |
| 207 | + dataset_properties={'sparse': True}) |
| 208 | + print(cs) |
| 209 | + for i in range(10): |
| 210 | + config = cs.sample_configuration() |
| 211 | + config._populate_values() |
| 212 | + if 'classifier:passive_aggressive:n_iter' in config and \ |
| 213 | + config[ |
| 214 | + 'classifier:passive_aggressive:n_iter'] is not None: |
| 215 | + config._values['classifier:passive_aggressive:n_iter'] = 5 |
| 216 | + if 'classifier:sgd:n_iter' in config and \ |
| 217 | + config['classifier:sgd:n_iter'] is not None: |
| 218 | + config._values['classifier:sgd:n_iter'] = 5 |
| 219 | + |
| 220 | + print(config) |
| 221 | + X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston', |
| 222 | + make_sparse=True) |
| 223 | + cls = SimpleRegressionPipeline(config, random_state=1) |
| 224 | + try: |
| 225 | + cls.fit(X_train, Y_train) |
| 226 | + predictions = cls.predict(X_test) |
| 227 | + except ValueError as e: |
| 228 | + if "Floating-point under-/overflow occurred at epoch" in \ |
| 229 | + e.args[0] or \ |
| 230 | + "removed all features" in e.args[0] or \ |
| 231 | + "all features are discarded" in e.args[0]: |
| 232 | + continue |
| 233 | + else: |
| 234 | + print(config) |
| 235 | + traceback.print_tb(sys.exc_info()[2]) |
| 236 | + raise e |
| 237 | + except RuntimeWarning as e: |
| 238 | + if "invalid value encountered in sqrt" in e.args[0]: |
| 239 | + continue |
| 240 | + elif "divide by zero encountered in" in e.args[0]: |
| 241 | + continue |
| 242 | + elif "invalid value encountered in divide" in e.args[0]: |
| 243 | + continue |
| 244 | + elif "invalid value encountered in true_divide" in e.args[0]: |
| 245 | + continue |
| 246 | + else: |
| 247 | + print(config) |
| 248 | + raise e |
| 249 | + except UserWarning as e: |
| 250 | + if "FastICA did not converge" in e.args[0]: |
| 251 | + continue |
| 252 | + else: |
| 253 | + print(config) |
| 254 | + raise e |
| 255 | + |
136 | 256 | def test_default_configuration(self): |
137 | 257 | for i in range(2): |
138 | 258 | cs = SimpleRegressionPipeline.get_hyperparameter_search_space() |
|
0 commit comments