Maintenance for tests (train_test_split)

vecxoz · vecxoz · commit 25de6f3842f0 · 2019-06-23T18:59:17.000+03:00
diff --git a/tests/test_func_api_classification_binary.py b/tests/test_func_api_classification_binary.py
@@ -25,7 +25,7 @@
 import scipy.stats as st
 from sklearn.model_selection import cross_val_predict
 from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import train_test_split
+# from sklearn.model_selection import train_test_split
 from sklearn.model_selection import StratifiedKFold
 from sklearn.datasets import make_classification
 from sklearn.metrics import accuracy_score
@@ -41,7 +41,27 @@
 
 X, y = make_classification(n_samples = 500, n_features = 5, n_informative = 3, n_redundant = 1, 
                            n_classes = n_classes, flip_y = 0, random_state = 0)
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+
+
+# Make train/test split by hand to avoid strange errors probably related to testing suit:
+# https://github.com/scikit-learn/scikit-learn/issues/1684
+# https://github.com/scikit-learn/scikit-learn/issues/1704
+# Note: Python 2.7, 3.4 - OK, but 3.5, 3.6 - error
+
+np.random.seed(0)
+ind = np.arange(500)
+np.random.shuffle(ind)
+
+ind_train = ind[:400]
+ind_test = ind[400:]
+
+X_train = X[ind_train]
+X_test = X[ind_test]
+
+y_train = y[ind_train]
+y_test = y[ind_test]
+
 
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
diff --git a/tests/test_func_api_classification_multiclass.py b/tests/test_func_api_classification_multiclass.py
@@ -22,7 +22,7 @@
 import scipy.stats as st
 from sklearn.model_selection import cross_val_predict
 from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import train_test_split
+# from sklearn.model_selection import train_test_split
 from sklearn.model_selection import StratifiedKFold
 from sklearn.datasets import make_classification
 from sklearn.metrics import accuracy_score
@@ -38,7 +38,27 @@
 
 X, y = make_classification(n_samples = 500, n_features = 5, n_informative = 3, n_redundant = 1, 
                            n_classes = n_classes, flip_y = 0, random_state = 0)
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+
+
+# Make train/test split by hand to avoid strange errors probably related to testing suit:
+# https://github.com/scikit-learn/scikit-learn/issues/1684
+# https://github.com/scikit-learn/scikit-learn/issues/1704
+# Note: Python 2.7, 3.4 - OK, but 3.5, 3.6 - error
+
+np.random.seed(0)
+ind = np.arange(500)
+np.random.shuffle(ind)
+
+ind_train = ind[:400]
+ind_test = ind[400:]
+
+X_train = X[ind_train]
+X_test = X[ind_test]
+
+y_train = y[ind_train]
+y_test = y[ind_test]
+
 
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
diff --git a/tests/test_func_api_regression.py b/tests/test_func_api_regression.py
@@ -24,7 +24,7 @@
 from scipy.sparse import coo_matrix
 from sklearn.model_selection import cross_val_predict
 from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import train_test_split
+# from sklearn.model_selection import train_test_split
 from sklearn.model_selection import KFold
 from sklearn.datasets import load_boston
 from sklearn.metrics import mean_absolute_error
@@ -39,7 +39,27 @@
 
 boston = load_boston()
 X, y = boston.data, boston.target
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+
+
+# Make train/test split by hand to avoid strange errors probably related to testing suit:
+# https://github.com/scikit-learn/scikit-learn/issues/1684
+# https://github.com/scikit-learn/scikit-learn/issues/1704
+# Note: Python 2.7, 3.4 - OK, but 3.5, 3.6 - error
+
+np.random.seed(0)
+ind = np.arange(500)
+np.random.shuffle(ind)
+
+ind_train = ind[:400]
+ind_test = ind[400:]
+
+X_train = X[ind_train]
+X_test = X[ind_test]
+
+y_train = y[ind_train]
+y_test = y[ind_test]
+
 
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
diff --git a/tests/test_sklearn_api_classification_binary.py b/tests/test_sklearn_api_classification_binary.py
@@ -24,7 +24,7 @@
 import scipy.stats as st
 from sklearn.model_selection import cross_val_predict
 from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import train_test_split
+# from sklearn.model_selection import train_test_split
 # from sklearn.model_selection import KFold
 from sklearn.model_selection import StratifiedKFold
 from sklearn.datasets import make_classification
@@ -48,9 +48,29 @@
                            n_classes=n_classes, flip_y=0,
                            random_state=0)
 
-X_train, X_test, y_train, y_test = train_test_split(X, y,
-                                                    test_size=0.2,
-                                                    random_state=0)
+# X_train, X_test, y_train, y_test = train_test_split(X, y,
+#                                                     test_size=0.2,
+#                                                     random_state=0)
+
+
+# Make train/test split by hand to avoid strange errors probably related to testing suit:
+# https://github.com/scikit-learn/scikit-learn/issues/1684
+# https://github.com/scikit-learn/scikit-learn/issues/1704
+# Note: Python 2.7, 3.4 - OK, but 3.5, 3.6 - error
+
+np.random.seed(0)
+ind = np.arange(500)
+np.random.shuffle(ind)
+
+ind_train = ind[:400]
+ind_test = ind[400:]
+
+X_train = X[ind_train]
+X_test = X[ind_test]
+
+y_train = y[ind_train]
+y_test = y[ind_test]
+
 
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
diff --git a/tests/test_sklearn_api_classification_multiclass.py b/tests/test_sklearn_api_classification_multiclass.py
@@ -21,7 +21,7 @@
 import scipy.stats as st
 from sklearn.model_selection import cross_val_predict
 from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import train_test_split
+# from sklearn.model_selection import train_test_split
 # from sklearn.model_selection import KFold
 from sklearn.model_selection import StratifiedKFold
 from sklearn.datasets import make_classification
@@ -45,9 +45,29 @@
                            n_classes=n_classes, flip_y=0,
                            random_state=0)
 
-X_train, X_test, y_train, y_test = train_test_split(X, y,
-                                                    test_size=0.2,
-                                                    random_state=0)
+# X_train, X_test, y_train, y_test = train_test_split(X, y,
+#                                                     test_size=0.2,
+#                                                     random_state=0)
+
+
+# Make train/test split by hand to avoid strange errors probably related to testing suit:
+# https://github.com/scikit-learn/scikit-learn/issues/1684
+# https://github.com/scikit-learn/scikit-learn/issues/1704
+# Note: Python 2.7, 3.4 - OK, but 3.5, 3.6 - error
+
+np.random.seed(0)
+ind = np.arange(500)
+np.random.shuffle(ind)
+
+ind_train = ind[:400]
+ind_test = ind[400:]
+
+X_train = X[ind_train]
+X_test = X[ind_test]
+
+y_train = y[ind_train]
+y_test = y[ind_test]
+
 
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
diff --git a/tests/test_sklearn_api_regression.py b/tests/test_sklearn_api_regression.py
@@ -25,7 +25,7 @@
 from sklearn.base import RegressorMixin
 from sklearn.model_selection import cross_val_predict
 from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import train_test_split
+# from sklearn.model_selection import train_test_split
 from sklearn.model_selection import KFold
 from sklearn.model_selection import GridSearchCV
 from sklearn.model_selection import RandomizedSearchCV
@@ -50,7 +50,27 @@
 
 boston = load_boston()
 X, y = boston.data, boston.target
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+
+
+# Make train/test split by hand to avoid strange errors probably related to testing suit:
+# https://github.com/scikit-learn/scikit-learn/issues/1684
+# https://github.com/scikit-learn/scikit-learn/issues/1704
+# Note: Python 2.7, 3.4 - OK, but 3.5, 3.6 - error
+
+np.random.seed(0)
+ind = np.arange(500)
+np.random.shuffle(ind)
+
+ind_train = ind[:400]
+ind_test = ind[400:]
+
+X_train = X[ind_train]
+X_test = X[ind_test]
+
+y_train = y[ind_train]
+y_test = y[ind_test]
+
 
 # -----------------------------------------------------------------------------
 # Scikit-learn INcompatible estimator