|
1 | | -from sklearn.datasets import fetch_openml |
| 1 | +import numpy as np |
| 2 | +from sklearn.datasets import fetch_openml, load_iris |
2 | 3 | from sklearn.metrics import accuracy_score |
3 | 4 | from sklearn.model_selection import GroupKFold, KFold, train_test_split |
4 | 5 |
|
@@ -48,7 +49,7 @@ def test_time(): |
48 | 49 | _test(split_type="time") |
49 | 50 |
|
50 | 51 |
|
51 | | -def test_groups(): |
| 52 | +def test_groups_for_classification_task(): |
52 | 53 | from sklearn.externals._arff import ArffException |
53 | 54 |
|
54 | 55 | try: |
@@ -88,6 +89,35 @@ def test_groups(): |
88 | 89 | automl.fit(X, y, **automl_settings) |
89 | 90 |
|
90 | 91 |
|
| 92 | +def test_groups_for_regression_task(): |
| 93 | + """Append nonsensical groups to iris dataset and use it to test that GroupKFold works for regression tasks""" |
| 94 | + iris_dict_data = load_iris(as_frame=True) # numpy arrays |
| 95 | + iris_data = iris_dict_data["frame"] # pandas dataframe data + target |
| 96 | + |
| 97 | + rng = np.random.default_rng(42) |
| 98 | + iris_data["cluster"] = rng.integers( |
| 99 | + low=0, high=5, size=iris_data.shape[0] |
| 100 | + ) # np.random.randint(0, 5, iris_data.shape[0]) |
| 101 | + |
| 102 | + automl = AutoML() |
| 103 | + X = iris_data[["sepal length (cm)", "sepal width (cm)", "petal length (cm)"]].to_numpy() |
| 104 | + y = iris_data["petal width (cm)"] |
| 105 | + X_train, X_test, y_train, y_test, groups_train, groups_test = train_test_split( |
| 106 | + X, y, iris_data["cluster"], random_state=42 |
| 107 | + ) |
| 108 | + automl_settings = { |
| 109 | + "max_iter": 5, |
| 110 | + "time_budget": -1, |
| 111 | + "metric": "r2", |
| 112 | + "task": "regression", |
| 113 | + "estimator_list": ["lgbm", "rf", "xgboost", "kneighbor"], |
| 114 | + "eval_method": "cv", |
| 115 | + "split_type": "uniform", |
| 116 | + "groups": groups_train, |
| 117 | + } |
| 118 | + automl.fit(X_train, y_train, **automl_settings) |
| 119 | + |
| 120 | + |
91 | 121 | def test_stratified_groupkfold(): |
92 | 122 | from minio.error import ServerError |
93 | 123 | from sklearn.model_selection import StratifiedGroupKFold |
@@ -203,4 +233,4 @@ def get_n_splits(self, X=None, y=None, groups=None): |
203 | 233 |
|
204 | 234 |
|
205 | 235 | if __name__ == "__main__": |
206 | | - test_groups() |
| 236 | + test_groups_for_classification_task() |
0 commit comments