|
23 | 23 | eval_holdout, eval_iterative_holdout, eval_cv, eval_partial_cv |
24 | 24 | from autosklearn.util import backend |
25 | 25 | from autosklearn.util.pipeline import get_configuration_space |
26 | | -from autosklearn.constants import * |
| 26 | +from autosklearn.constants import BINARY_CLASSIFICATION, \ |
| 27 | + MULTILABEL_CLASSIFICATION,\ |
| 28 | + MULTICLASS_CLASSIFICATION,\ |
| 29 | + REGRESSION |
27 | 30 | from autosklearn.metrics import accuracy, r2, f1_macro |
28 | 31 |
|
29 | 32 | this_directory = os.path.dirname(__file__) |
@@ -1226,6 +1229,112 @@ def test_get_splitter_cv_object(self, te_mock): |
1226 | 1229 | next(cv.split(D.data['Y_train'], D.data['Y_train'] |
1227 | 1230 | , groups=evaluator.resampling_strategy_args['groups'])) |
1228 | 1231 |
|
| 1232 | + @unittest.mock.patch.object(TrainEvaluator, "__init__") |
| 1233 | + def test_holdout_split_size(self, te_mock): |
| 1234 | + te_mock.return_value = None |
| 1235 | + D = unittest.mock.Mock(spec=AbstractDataManager) |
| 1236 | + D.feat_type = [] |
| 1237 | + |
| 1238 | + evaluator = TrainEvaluator() |
| 1239 | + evaluator.resampling_strategy = 'holdout' |
| 1240 | + |
| 1241 | + # Exact Ratio |
| 1242 | + D.data = dict(Y_train=np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])) |
| 1243 | + D.info = dict(task=BINARY_CLASSIFICATION) |
| 1244 | + evaluator.resampling_strategy_args = {'shuffle': True, |
| 1245 | + 'train_size': 0.7} |
| 1246 | + cv = evaluator.get_splitter(D) |
| 1247 | + |
| 1248 | + self.assertEqual(cv.get_n_splits(), 1) |
| 1249 | + train_samples, test_samples = next(cv.split(D.data['Y_train'], |
| 1250 | + D.data['Y_train'])) |
| 1251 | + self.assertEqual(len(train_samples), 7) |
| 1252 | + self.assertEqual(len(test_samples), 3) |
| 1253 | + |
| 1254 | + # No Shuffle |
| 1255 | + evaluator.resampling_strategy_args = {'shuffle': False, |
| 1256 | + 'train_size': 0.7} |
| 1257 | + cv = evaluator.get_splitter(D) |
| 1258 | + |
| 1259 | + self.assertEqual(cv.get_n_splits(), 1) |
| 1260 | + train_samples, test_samples = next(cv.split(D.data['Y_train'], |
| 1261 | + D.data['Y_train'])) |
| 1262 | + self.assertEqual(len(train_samples), 7) |
| 1263 | + self.assertEqual(len(test_samples), 3) |
| 1264 | + |
| 1265 | + # Rounded Ratio |
| 1266 | + D.data = dict(Y_train=np.array([0, 0, 0, 0, 0, 1, 1, 1, 1])) |
| 1267 | + |
| 1268 | + evaluator.resampling_strategy_args = {'shuffle': True, |
| 1269 | + 'train_size': 0.7} |
| 1270 | + cv = evaluator.get_splitter(D) |
| 1271 | + |
| 1272 | + self.assertEqual(cv.get_n_splits(), 1) |
| 1273 | + train_samples, test_samples = next(cv.split(D.data['Y_train'], |
| 1274 | + D.data['Y_train'])) |
| 1275 | + self.assertEqual(len(train_samples), 6) |
| 1276 | + self.assertEqual(len(test_samples), 3) |
| 1277 | + |
| 1278 | + # Rounded Ratio No Shuffle |
| 1279 | + evaluator.resampling_strategy_args = {'shuffle': False, |
| 1280 | + 'train_size': 0.7} |
| 1281 | + cv = evaluator.get_splitter(D) |
| 1282 | + |
| 1283 | + self.assertEqual(cv.get_n_splits(), 1) |
| 1284 | + train_samples, test_samples = next(cv.split(D.data['Y_train'], |
| 1285 | + D.data['Y_train'])) |
| 1286 | + self.assertEqual(len(train_samples), 6) |
| 1287 | + self.assertEqual(len(test_samples), 3) |
| 1288 | + |
| 1289 | + # More data |
| 1290 | + evaluator.resampling_strategy_args = {'shuffle': True, |
| 1291 | + 'train_size': 0.7} |
| 1292 | + |
| 1293 | + D.data = dict(Y_train=np.zeros((900, 1))) |
| 1294 | + cv = evaluator.get_splitter(D) |
| 1295 | + self.assertEqual(cv.get_n_splits(), 1) |
| 1296 | + train_samples, test_samples = next(cv.split(D.data['Y_train'], |
| 1297 | + D.data['Y_train'])) |
| 1298 | + self.assertEqual(len(train_samples), 630) |
| 1299 | + self.assertEqual(len(test_samples), 270) |
| 1300 | + |
| 1301 | + evaluator.resampling_strategy_args = {'train_size': 0.752} |
| 1302 | + D.data = dict(Y_train=np.zeros((900, 1))) |
| 1303 | + cv = evaluator.get_splitter(D) |
| 1304 | + self.assertEqual(cv.get_n_splits(), 1) |
| 1305 | + train_samples, test_samples = next(cv.split(D.data['Y_train'], |
| 1306 | + D.data['Y_train'])) |
| 1307 | + self.assertEqual(len(train_samples), 676) |
| 1308 | + self.assertEqual(len(test_samples), 224) |
| 1309 | + |
| 1310 | + # Multilabel Exact Ratio |
| 1311 | + D.data = dict(Y_train=np.array([[0, 0], [0, 1], [1, 1], [1, 0], [1, 1], |
| 1312 | + [1, 1], [1, 1], [1, 0], [1, 1], [1, 1]] |
| 1313 | + )) |
| 1314 | + D.info = dict(task=MULTILABEL_CLASSIFICATION) |
| 1315 | + evaluator.resampling_strategy_args = {'shuffle': True, |
| 1316 | + 'train_size': 0.7} |
| 1317 | + cv = evaluator.get_splitter(D) |
| 1318 | + |
| 1319 | + self.assertEqual(cv.get_n_splits(), 1) |
| 1320 | + train_samples, test_samples = next(cv.split(D.data['Y_train'], |
| 1321 | + D.data['Y_train'])) |
| 1322 | + self.assertEqual(len(train_samples), 7) |
| 1323 | + self.assertEqual(len(test_samples), 3) |
| 1324 | + |
| 1325 | + # Multilabel No Shuffle |
| 1326 | + D.data = dict(Y_train=np.array([[0, 0], [0, 1], [1, 1], [1, 0], [1, 1], |
| 1327 | + [1, 1], [1, 1], [1, 0], [1, 1]])) |
| 1328 | + evaluator.resampling_strategy_args = {'shuffle': False, |
| 1329 | + 'train_size': 0.7} |
| 1330 | + cv = evaluator.get_splitter(D) |
| 1331 | + |
| 1332 | + self.assertEqual(cv.get_n_splits(), 1) |
| 1333 | + train_samples, test_samples = next(cv.split(D.data['Y_train'], |
| 1334 | + D.data['Y_train'])) |
| 1335 | + self.assertEqual(len(train_samples), 6) |
| 1336 | + self.assertEqual(len(test_samples), 3) |
| 1337 | + |
1229 | 1338 |
|
1230 | 1339 | class FunctionsTest(unittest.TestCase): |
1231 | 1340 | def setUp(self): |
|
0 commit comments