From c884e775d53d1cc3b7781d296fd57225928dc0cd Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 7 Jan 2023 14:57:21 +0100 Subject: [PATCH 01/17] update for scikit-learn 1.2 --- mlinsights/__init__.py | 2 +- ... _piecewise_tree_regression_common120.pyx} | 4 ++-- mlinsights/mlmodel/interval_regressor.py | 2 +- .../mlmodel/piecewise_tree_regression.py | 2 +- .../piecewise_tree_regression_criterion.pyx | 20 +++++++++-------- ...ecewise_tree_regression_criterion_fast.pyx | 22 ++++++++++--------- ...ewise_tree_regression_criterion_linear.pyx | 19 +++++++++------- requirements-win.txt | 2 +- requirements.txt | 2 +- setup.py | 6 ++--- 10 files changed, 44 insertions(+), 37 deletions(-) rename mlinsights/mlmodel/{_piecewise_tree_regression_common024.pyx => _piecewise_tree_regression_common120.pyx} (99%) diff --git a/mlinsights/__init__.py b/mlinsights/__init__.py index 33940ed9..8b2e010b 100644 --- a/mlinsights/__init__.py +++ b/mlinsights/__init__.py @@ -4,7 +4,7 @@ @brief Module *mlinsights*. Look for insights for machine learned models. """ -__version__ = "0.3.649" +__version__ = "0.4.649" __author__ = "Xavier Dupré" __github__ = "https://github.com/sdpython/mlinsights" __url__ = "http://www.xavierdupre.fr/app/mlinsights/helpsphinx/index.html" diff --git a/mlinsights/mlmodel/_piecewise_tree_regression_common024.pyx b/mlinsights/mlmodel/_piecewise_tree_regression_common120.pyx similarity index 99% rename from mlinsights/mlmodel/_piecewise_tree_regression_common024.pyx rename to mlinsights/mlmodel/_piecewise_tree_regression_common120.pyx index 28bd4b32..80b2064f 100644 --- a/mlinsights/mlmodel/_piecewise_tree_regression_common024.pyx +++ b/mlinsights/mlmodel/_piecewise_tree_regression_common120.pyx @@ -229,8 +229,8 @@ def _test_criterion_init(Criterion criterion, SIZE_t start, SIZE_t end): "Test purposes. Methods cannot be directly called from python." criterion.init(y, - &sample_weight[0], weighted_n_samples, - &samples[0], start, end) + sample_weight, weighted_n_samples, + samples, start, end) def _test_criterion_check(Criterion criterion): diff --git a/mlinsights/mlmodel/interval_regressor.py b/mlinsights/mlmodel/interval_regressor.py index c01afb9e..fc5e856f 100644 --- a/mlinsights/mlmodel/interval_regressor.py +++ b/mlinsights/mlmodel/interval_regressor.py @@ -87,7 +87,7 @@ def _fit_piecewise_estimator(i, est, X, y, sample_weight, alpha): rnd = numpy.random.randint(0, X.shape[0] - 1, new_size) Xr = X[rnd] yr = y[rnd] - sr = sample_weight[rnd] if sample_weight else None + sr = sample_weight[rnd] if sample_weight is not None else None return est.fit(Xr, yr, sr) self.estimators_ = \ diff --git a/mlinsights/mlmodel/piecewise_tree_regression.py b/mlinsights/mlmodel/piecewise_tree_regression.py index b10a4b82..3338db75 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression.py +++ b/mlinsights/mlmodel/piecewise_tree_regression.py @@ -132,7 +132,7 @@ def _fit_reglin(self, X, y, sample_weight): if len(ys.shape) == 1: ys = ys[:, numpy.newaxis] ys = ys.copy() - ws = sample_weight[ind].copy() if sample_weight else None + ws = sample_weight[ind].copy() if sample_weight is not None else None dec = LinearRegressorCriterion.create(xs, ys, ws) dec.node_beta(self.betas_[i, :]) diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx index 2441ec60..3c76b92b 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx +++ b/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx @@ -62,8 +62,8 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion): self.sample_i = NULL # Criterion interface - self.sample_weight = NULL - self.samples = NULL + self.sample_weight = None + self.samples_indices = None # allocation if self.sample_w == NULL: @@ -74,8 +74,9 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion): self.sample_i = calloc(n_samples, sizeof(SIZE_t)) cdef int init(self, const DOUBLE_t[:, ::1] y, - DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, + const DOUBLE_t[:] sample_weight, + double weighted_n_samples, + const SIZE_t[:] sample_indices, SIZE_t start, SIZE_t end) nogil except -1: """ This function is overwritten to check *y* and *X* size are the same. @@ -86,12 +87,13 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion): if y.shape[1] != 1: raise ValueError("This class only works for a single vector.") return self.init_with_X(y, sample_weight, weighted_n_samples, - samples, start, end) + sample_indices, start, end) cdef int init_with_X(self, const DOUBLE_t[:, ::1] y, - DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, + const DOUBLE_t[:] sample_weight, + double weighted_n_samples, + const SIZE_t[:] samples_indices, SIZE_t start, SIZE_t end) nogil except -1: """ Initializes the criterion. @@ -125,9 +127,9 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion): # Filling accumulators. for ki in range(start, end): - ks = samples[ki] + ks = samples_indices[ki] self.sample_i[ki] = ks - self.sample_w[ki] = sample_weight[ks] if sample_weight else 1. + self.sample_w[ki] = sample_weight[ks] if sample_weight is not None else 1. self.sample_wy[ki] = self.sample_w[ki] * y[ks, 0] self.sample_sum_wy += y[ks, 0] * self.sample_w[ki] self.sample_sum_w += self.sample_w[ki] diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx index 5c7965ec..7aa519fc 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx +++ b/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx @@ -55,8 +55,8 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): self.sample_wy2_left = NULL # Criterion interface - self.sample_weight = NULL - self.samples = NULL + self.sample_weight = None + self.samples_indices = None # allocations if self.sample_w_left == NULL: @@ -67,8 +67,9 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): self.sample_wy2_left = calloc(n_samples, sizeof(DOUBLE_t)) cdef int init(self, const DOUBLE_t[:, ::1] y, - DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, + const DOUBLE_t[:] sample_weight, + double weighted_n_samples, + const SIZE_t[:] samples, SIZE_t start, SIZE_t end) nogil except -1: """ This function is overwritten to check *y* and *X* size are the same. @@ -83,8 +84,9 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): cdef int init_with_X(self, const DOUBLE_t[:, ::1] y, - DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, + const DOUBLE_t[:] sample_weight, + double weighted_n_samples, + const SIZE_t[:] samples_indices, SIZE_t start, SIZE_t end) nogil except -1: """ Initializes the criterion. @@ -122,15 +124,15 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): # Left side. for ki in range(start, start+1): - ks = samples[ki] - w = sample_weight[ks] if sample_weight else 1. + ks = samples_indices[ki] + w = sample_weight[ks] if sample_weight is not None else 1. y_ = y[ks, 0] self.sample_w_left[ki] = w self.sample_wy_left[ki] = w * y_ self.sample_wy2_left[ki] = w * y_ * y_ for ki in range(start+1, end): - ks = samples[ki] - w = sample_weight[ks] if sample_weight else 1. + ks = samples_indices[ki] + w = sample_weight[ks] if sample_weight is not None else 1. y_ = y[ks, 0] self.sample_w_left[ki] = self.sample_w_left[ki-1] + w self.sample_wy_left[ki] = self.sample_wy_left[ki-1] + w * y_ diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx index 245fcb07..3dcc491d 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx +++ b/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx @@ -90,8 +90,8 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): self.sample_work = NULL # Criterion interface - self.sample_weight = NULL - self.samples = NULL + self.sample_weight = None + self.samples_indices = None # allocation if self.sample_w == NULL: @@ -156,12 +156,14 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): return obj cdef int init(self, const DOUBLE_t[:, ::1] y, - DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, + const DOUBLE_t[:] sample_weight, + double weighted_n_samples, + const SIZE_t[:] samples, SIZE_t start, SIZE_t end) nogil except -1: """ This function is overwritten to check *y* and *X* size are the same. This API changed in 0.21. + It changed again in scikit-learn 1.2 to replace `DOUBLE_t*` into `DOUBLE[:]`. """ if y.shape[0] != self.n_samples: raise ValueError("n_samples={} -- y.shape={}".format(self.n_samples, y.shape)) @@ -174,8 +176,9 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): cdef int init_with_X(self, const DOUBLE_t[:, ::1] X, const DOUBLE_t[:, ::1] y, - DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, + const DOUBLE_t[:] sample_weight, + double weighted_n_samples, + const SIZE_t[:] samples, SIZE_t start, SIZE_t end) nogil except -1: """ Initializes the criterion. @@ -209,9 +212,9 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): # Filling accumulators. idx = start * self.nbvar for ki in range(start, end): - ks = samples[ki] + ks = samples_indices[ki] self.sample_i[ki] = ks - self.sample_w[ki] = sample_weight[ks] if sample_weight else 1. + self.sample_w[ki] = sample_weight[ks] if sample_weight is not None else 1. self.sample_wy[ki] = self.sample_w[ki] * y[ks, 0] self.sample_y[ki] = y[ks, 0] self.sample_sum_wy += y[ks, 0] * self.sample_w[ki] diff --git a/requirements-win.txt b/requirements-win.txt index bbed5e0d..b04b2c7e 100644 --- a/requirements-win.txt +++ b/requirements-win.txt @@ -13,5 +13,5 @@ psutil pylint>=2.14.0 pymyinstall pyshp -scikit-learn>=1.0 +scikit-learn>=1.2 threadpoolctl diff --git a/requirements.txt b/requirements.txt index 9580f4b0..76903d60 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ pydata-sphinx-theme pyquickhelper>=1.10 pyquicksetup pylint>=2.14.0 -scikit-learn>=1.0 +scikit-learn>=1.2 scipy seaborn skl2onnx diff --git a/setup.py b/setup.py index 7e0ea026..71333521 100644 --- a/setup.py +++ b/setup.py @@ -55,9 +55,9 @@ def get_extensions(): extensions = ["direct_blas_lapack"] spl = sklearn.__version__.split('.') vskl = (int(spl[0]), int(spl[1])) - if vskl >= (0, 24): + if vskl >= (1, 2): extensions.append(("_piecewise_tree_regression_common", - "_piecewise_tree_regression_common024")) + "_piecewise_tree_regression_common120")) else: raise ImportError("Cannot build mlisinghts for scikit-learn<1.0.") @@ -122,7 +122,7 @@ def get_extensions(): package_dir=package_dir, package_data=package_data, setup_requires=["pyquicksetup", 'cython', 'scipy', 'scikit-learn'], - install_requires=['cython', 'scikit-learn>=1.0', 'pandas', 'scipy', + install_requires=['cython', 'scikit-learn>=1.2', 'pandas', 'scipy', 'matplotlib', 'pandas_streaming', 'numpy>=1.16'], ext_modules=ext_modules, # cythonize(ext_modules), ) From 3d1e4e532b4368ec33e07144ae9f0b44111942f0 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sun, 8 Jan 2023 11:53:41 +0100 Subject: [PATCH 02/17] fix compilation issues, updates API --- .gitignore | 2 +- _unittests/ut_mlmodel/test_anmf_predictor.py | 16 ++--- .../ut_mlmodel/test_categories_to_integers.py | 14 ++-- .../ut_mlmodel/test_classification_kmeans.py | 12 ++-- .../test_decision_tree_logistic_regression.py | 18 ++--- .../ut_mlmodel/test_extended_features.py | 65 ++++++++++--------- .../ut_mlmodel/test_piecewise_classifier.py | 15 +++-- .../ut_mlmodel/test_piecewise_regressor.py | 17 +++-- .../ut_mlmodel/test_quantile_mlpregression.py | 18 +++-- .../ut_mlmodel/test_quantile_regression.py | 17 +++-- .../ut_mlmodel/test_transfer_transformer.py | 26 ++++---- .../ut_mlmodel/test_tsne_predictable.py | 10 +-- mlinsights/mlmodel/__init__.py | 12 +++- mlinsights/mlmodel/extended_features.py | 4 +- .../piecewise_tree_regression_criterion.pyx | 6 +- ...ecewise_tree_regression_criterion_fast.pyx | 8 +-- ...ewise_tree_regression_criterion_linear.pyx | 15 ++--- mlinsights/mlmodel/sklearn_testing.py | 6 +- 18 files changed, 151 insertions(+), 130 deletions(-) diff --git a/.gitignore b/.gitignore index 5dbb4522..6703d4c3 100644 --- a/.gitignore +++ b/.gitignore @@ -80,7 +80,7 @@ build/ *.pidb *.log *.scc -*.pyd +*.so # Visual C++ cache files ipch/ diff --git a/_unittests/ut_mlmodel/test_anmf_predictor.py b/_unittests/ut_mlmodel/test_anmf_predictor.py index ae5262df..e7493f39 100644 --- a/_unittests/ut_mlmodel/test_anmf_predictor.py +++ b/_unittests/ut_mlmodel/test_anmf_predictor.py @@ -45,8 +45,8 @@ def test_anmf_predictor_sparse(self): exp = mod.estimator_nmf_.inverse_transform( mod.estimator_nmf_.transform(mat)) got = mod.predict(mat) - sc1 = mean_squared_error(mat.todense(), exp) - sc2 = mean_squared_error(mat.todense(), got) + sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp) + sc2 = mean_squared_error(numpy.asarray(mat.todense()), got) self.assertGreater(sc1, sc2) mat2 = numpy.array([[1, 1, 1, 1]], dtype=numpy.float64) @@ -68,8 +68,8 @@ def test_anmf_predictor_sparse_sparse(self): exp = mod.estimator_nmf_.inverse_transform( mod.estimator_nmf_.transform(mat)) got = mod.predict(mat) - sc1 = mean_squared_error(mat.todense(), exp) - sc2 = mean_squared_error(mat.todense(), got) + sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp) + sc2 = mean_squared_error(numpy.asarray(mat.todense()), got) self.assertGreater(sc1, sc2) mat2 = numpy.array([[1, 1, 1, 1]], dtype=numpy.float64) @@ -77,8 +77,8 @@ def test_anmf_predictor_sparse_sparse(self): exp2 = mod.estimator_nmf_.inverse_transform( mod.estimator_nmf_.transform(mat2)) got2 = mod.predict(mat2) - sc1 = mean_squared_error(mat2.todense(), exp2) - sc2 = mean_squared_error(mat2.todense(), got2) + sc1 = mean_squared_error(numpy.asarray(mat2.todense()), exp2) + sc2 = mean_squared_error(numpy.asarray(mat2.todense()), got2) self.assertGreater(sc1, sc2) def test_anmf_predictor_positive(self): @@ -118,8 +118,8 @@ def test_anmf_predictor_positive_sparse(self): exp = mod.estimator_nmf_.inverse_transform( mod.estimator_nmf_.transform(mat)) got = mod.predict(mat) - sc1 = mean_squared_error(mat.todense(), exp) - sc2 = mean_squared_error(mat.todense(), got) + sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp) + sc2 = mean_squared_error(numpy.asarray(mat.todense()), got) self.assertGreater(sc1, sc2) mx = numpy.min(got) self.assertGreater(mx, 0) diff --git a/_unittests/ut_mlmodel/test_categories_to_integers.py b/_unittests/ut_mlmodel/test_categories_to_integers.py index d975c1e7..512d25ce 100644 --- a/_unittests/ut_mlmodel/test_categories_to_integers.py +++ b/_unittests/ut_mlmodel/test_categories_to_integers.py @@ -13,7 +13,9 @@ from pyquickhelper.texthelper import compare_module_version from mlinsights.mlmodel import CategoriesToIntegers from mlinsights.mlmodel import ( - test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv) + run_test_sklearn_pickle, + run_test_sklearn_clone, + run_test_sklearn_grid_search_cv) skipped_warnings = (ConvergenceWarning, UserWarning, FitFailedWarning) @@ -103,12 +105,12 @@ def test_categories_to_integers_pickle(self): data = os.path.join(os.path.abspath( os.path.dirname(__file__)), "data", "adult_set.txt") df = pandas.read_csv(data, sep="\t") - test_sklearn_pickle(lambda: CategoriesToIntegers(skip_errors=True), df) + run_test_sklearn_pickle(lambda: CategoriesToIntegers(skip_errors=True), df) @ignore_warnings(skipped_warnings) def test_categories_to_integers_clone(self): self.maxDiff = None - test_sklearn_clone(lambda: CategoriesToIntegers()) + run_test_sklearn_clone(lambda: CategoriesToIntegers()) @ignore_warnings(skipped_warnings) def test_categories_to_integers_grid_search(self): @@ -119,19 +121,19 @@ def test_categories_to_integers_grid_search(self): y = df['income'] # pylint: disable=E1136 pipe = make_pipeline(CategoriesToIntegers(), LogisticRegression()) - self.assertRaise(lambda: test_sklearn_grid_search_cv( + self.assertRaise(lambda: run_test_sklearn_grid_search_cv( lambda: pipe, df), ValueError) if (compare_module_version(sklver, "0.24") >= 0 and # pylint: disable=R1716 compare_module_version(pandas.__version__, "1.3") < 0): self.assertRaise( - lambda: test_sklearn_grid_search_cv( + lambda: run_test_sklearn_grid_search_cv( lambda: pipe, X, y, categoriestointegers__single=[True, False]), ValueError, "Unable to find category value") pipe = make_pipeline(CategoriesToIntegers(), Imputer(strategy='most_frequent'), LogisticRegression(n_jobs=1)) try: - res = test_sklearn_grid_search_cv( + res = run_test_sklearn_grid_search_cv( lambda: pipe, X, y, categoriestointegers__single=[True, False], categoriestointegers__skip_errors=[True]) except AttributeError as e: diff --git a/_unittests/ut_mlmodel/test_classification_kmeans.py b/_unittests/ut_mlmodel/test_classification_kmeans.py index 7ae4e8f5..44dff583 100644 --- a/_unittests/ut_mlmodel/test_classification_kmeans.py +++ b/_unittests/ut_mlmodel/test_classification_kmeans.py @@ -15,8 +15,8 @@ from pyquickhelper.pycode import ExtTestCase from pyquickhelper.texthelper import compare_module_version from mlinsights.mlmodel import ( - ClassifierAfterKMeans, test_sklearn_pickle, test_sklearn_clone, - test_sklearn_grid_search_cv) + ClassifierAfterKMeans, run_test_sklearn_pickle, + run_test_sklearn_clone, run_test_sklearn_grid_search_cv) class TestClassifierAfterKMeans(ExtTestCase): @@ -58,7 +58,7 @@ def test_classification_kmeans_pickle(self): iris = datasets.load_iris() X, y = iris.data, iris.target try: - test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y) + run_test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y) except AttributeError as e: if compare_module_version(sklver, "0.24") < 0: return @@ -66,16 +66,16 @@ def test_classification_kmeans_pickle(self): def test_classification_kmeans_clone(self): self.maxDiff = None - test_sklearn_clone(lambda: ClassifierAfterKMeans()) + run_test_sklearn_clone(lambda: ClassifierAfterKMeans()) @ignore_warnings(category=ConvergenceWarning) def test_classification_kmeans_grid_search(self): iris = datasets.load_iris() X, y = iris.data, iris.target - self.assertRaise(lambda: test_sklearn_grid_search_cv( + self.assertRaise(lambda: run_test_sklearn_grid_search_cv( lambda: ClassifierAfterKMeans(), X, y), ValueError) try: - res = test_sklearn_grid_search_cv( + res = run_test_sklearn_grid_search_cv( lambda: ClassifierAfterKMeans(), X, y, c_n_clusters=[2, 3]) except AttributeError as e: diff --git a/_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py b/_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py index 1fb035c8..ab34501b 100644 --- a/_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py +++ b/_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py @@ -12,9 +12,8 @@ from sklearn.tree import DecisionTreeClassifier from pyquickhelper.pycode import ExtTestCase from mlinsights.mlmodel import ( - test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv, - DecisionTreeLogisticRegression -) + run_test_sklearn_pickle, run_test_sklearn_clone, + run_test_sklearn_grid_search_cv, DecisionTreeLogisticRegression) from mlinsights.mltree import predict_leaves @@ -63,22 +62,23 @@ def test_classifier_pickle(self): X = random(100) Y = X > 0.5 # pylint: disable=W0143 X = X.reshape((100, 1)) # pylint: disable=E1101 - test_sklearn_pickle(lambda: LogisticRegression(), X, Y) - test_sklearn_pickle(lambda: DecisionTreeLogisticRegression( + run_test_sklearn_pickle(lambda: LogisticRegression(), X, Y) + run_test_sklearn_pickle(lambda: DecisionTreeLogisticRegression( fit_improve_algo=None), X, Y) def test_classifier_clone(self): - test_sklearn_clone( + run_test_sklearn_clone( lambda: DecisionTreeLogisticRegression(fit_improve_algo=None)) def test_classifier_grid_search(self): X = random(100) Y = X > 0.5 # pylint: disable=W0143 X = X.reshape((100, 1)) # pylint: disable=E1101 - self.assertRaise(lambda: test_sklearn_grid_search_cv( + self.assertRaise(lambda: run_test_sklearn_grid_search_cv( lambda: DecisionTreeLogisticRegression(fit_improve_algo=None), X, Y), ValueError) - res = test_sklearn_grid_search_cv(lambda: DecisionTreeLogisticRegression(fit_improve_algo=None), - X, Y, max_depth=[2, 3]) + res = run_test_sklearn_grid_search_cv( + lambda: DecisionTreeLogisticRegression(fit_improve_algo=None), + X, Y, max_depth=[2, 3]) self.assertIn('model', res) self.assertIn('score', res) self.assertGreater(res['score'], 0) diff --git a/_unittests/ut_mlmodel/test_extended_features.py b/_unittests/ut_mlmodel/test_extended_features.py index 79afdf09..123285db 100644 --- a/_unittests/ut_mlmodel/test_extended_features.py +++ b/_unittests/ut_mlmodel/test_extended_features.py @@ -41,13 +41,13 @@ def test_polynomial_features(self): poly = PolynomialFeatures(deg, include_bias=True) P_test = poly.fit_transform(X) self.assertEqual(P_test, P) - names = poly.get_feature_names() + names = poly.get_feature_names_out() ext = ExtendedFeatures(poly_degree=deg) e_test = ext.fit_transform(X) - e_names = ext.get_feature_names() + e_names = ext.get_feature_names_out() self.assertEqual(len(names), len(e_names)) - self.assertEqual(names, e_names) + self.assertEqual(list(names), list(e_names)) self.assertEqual(P_test, P) self.assertEqual(P_test.shape, e_test.shape) @@ -74,13 +74,13 @@ def test_polynomial_features_slow(self): poly = PolynomialFeatures(deg, include_bias=True) P_test = poly.fit_transform(X) self.assertEqual(P_test, P) - names = poly.get_feature_names() + names = poly.get_feature_names_out() ext = ExtendedFeatures(kind='poly-slow', poly_degree=deg) e_test = ext.fit_transform(X) - e_names = ext.get_feature_names() + e_names = ext.get_feature_names_out() self.assertEqual(len(names), len(e_names)) - self.assertEqual(names, e_names) + self.assertEqual(list(names), list(e_names)) self.assertEqual(P_test, P) self.assertEqual(P_test.shape, e_test.shape) @@ -109,7 +109,7 @@ def test_polynomial_features_nobias_ionly(self): interaction_only=True) P_test = poly.fit_transform(X) - names = poly.get_feature_names() + names = poly.get_feature_names_out() self.assertEqual(P_test, P[:, fc]) ext = ExtendedFeatures(poly_degree=deg, @@ -117,9 +117,9 @@ def test_polynomial_features_nobias_ionly(self): poly_interaction_only=True) e_test = ext.fit_transform(X) - e_names = ext.get_feature_names() + e_names = ext.get_feature_names_out() self.assertEqual(len(names), len(e_names)) - self.assertEqual(names, e_names) + self.assertEqual(list(names), list(e_names)) self.assertEqual(P_test, P[:, fc]) self.assertEqual(P_test.shape, e_test.shape) @@ -148,7 +148,7 @@ def test_polynomial_features_nobias_ionly_slow(self): interaction_only=True) P_test = poly.fit_transform(X) - names = poly.get_feature_names() + names = poly.get_feature_names_out() self.assertEqual(P_test, P[:, fc]) ext = ExtendedFeatures(kind="poly-slow", poly_degree=deg, @@ -156,9 +156,9 @@ def test_polynomial_features_nobias_ionly_slow(self): poly_interaction_only=True) e_test = ext.fit_transform(X) - e_names = ext.get_feature_names() + e_names = ext.get_feature_names_out() self.assertEqual(len(names), len(e_names)) - self.assertEqual(names, e_names) + self.assertEqual(list(names), list(e_names)) self.assertEqual(P_test, P[:, fc]) self.assertEqual(P_test.shape, e_test.shape) @@ -187,7 +187,7 @@ def test_polynomial_features_bias_ionly(self): interaction_only=True) P_test = poly.fit_transform(X) - names = poly.get_feature_names() + names = poly.get_feature_names_out() self.assertEqual(P_test, P[:, fc]) ext = ExtendedFeatures(poly_degree=deg, @@ -195,9 +195,9 @@ def test_polynomial_features_bias_ionly(self): poly_interaction_only=True) e_test = ext.fit_transform(X) - e_names = ext.get_feature_names() + e_names = ext.get_feature_names_out() self.assertEqual(len(names), len(e_names)) - self.assertEqual(names, e_names) + self.assertEqual(list(names), list(e_names)) self.assertEqual(P_test, P[:, fc]) self.assertEqual(P_test.shape, e_test.shape) @@ -226,7 +226,7 @@ def test_polynomial_features_bias_ionly_slow(self): interaction_only=True) P_test = poly.fit_transform(X) - names = poly.get_feature_names() + names = poly.get_feature_names_out() self.assertEqual(P_test, P[:, fc]) ext = ExtendedFeatures(kind="poly-slow", poly_degree=deg, @@ -234,9 +234,9 @@ def test_polynomial_features_bias_ionly_slow(self): poly_interaction_only=True) e_test = ext.fit_transform(X) - e_names = ext.get_feature_names() + e_names = ext.get_feature_names_out() self.assertEqual(len(names), len(e_names)) - self.assertEqual(names, e_names) + self.assertEqual(list(names), list(e_names)) self.assertEqual(P_test, P[:, fc]) self.assertEqual(P_test.shape, e_test.shape) @@ -263,15 +263,15 @@ def test_polynomial_features_nobias(self): poly = PolynomialFeatures(deg, include_bias=False) P_test = poly.fit_transform(X) self.assertEqual(P_test, P[:, 1:]) - names = poly.get_feature_names() + names = poly.get_feature_names_out() ext = ExtendedFeatures(poly_degree=deg, poly_include_bias=False) e_test = ext.fit_transform(X) self.assertEqual(P_test, P[:, 1:]) - e_names = ext.get_feature_names() + e_names = ext.get_feature_names_out() self.assertEqual(len(names), len(e_names)) - self.assertEqual(names, e_names) + self.assertEqual(list(names), list(e_names)) self.assertEqual(P_test.shape, e_test.shape) self.assertEqual(P_test, e_test) @@ -280,20 +280,20 @@ def test_polynomial_features_bigger(self): for deg in (1, 2, 3, 4): poly = PolynomialFeatures(deg, include_bias=True) X_sk = poly.fit_transform(X) - names_sk = poly.get_feature_names() + names_sk = poly.get_feature_names_out() ext = ExtendedFeatures(poly_degree=deg) X_ext = ext.fit_transform(X) inames = ["x%d" % i for i in range(0, X.shape[1])] - names_ext = ext.get_feature_names(inames) + names_ext = ext.get_feature_names_out(inames) self.assertEqual(len(names_sk), len(names_ext)) - self.assertEqual(names_sk, names_ext) + self.assertEqual(list(names_sk), list(names_ext)) - names_ext = ext.get_feature_names() + names_ext = ext.get_feature_names_out() self.assertEqual(len(names_sk), len(names_ext)) - self.assertEqual(names_sk, names_ext) + self.assertEqual(list(names_sk), list(names_ext)) self.assertEqual(X_sk.shape, X_ext.shape) self.assertEqual(X_sk, X_ext) @@ -304,21 +304,21 @@ def test_polynomial_features_bigger_ionly(self): poly = PolynomialFeatures(deg, include_bias=True, interaction_only=True) X_sk = poly.fit_transform(X) - names_sk = poly.get_feature_names() + names_sk = poly.get_feature_names_out() ext = ExtendedFeatures(poly_degree=deg, poly_include_bias=True, poly_interaction_only=True) X_ext = ext.fit_transform(X) inames = ["x%d" % i for i in range(0, X.shape[1])] - names_ext = ext.get_feature_names(inames) + names_ext = ext.get_feature_names_out(inames) self.assertEqual(len(names_sk), len(names_ext)) - self.assertEqual(names_sk, names_ext) + self.assertEqual(list(names_sk), list(names_ext)) - names_ext = ext.get_feature_names() + names_ext = ext.get_feature_names_out() self.assertEqual(len(names_sk), len(names_ext)) - self.assertEqual(names_sk, names_ext) + self.assertEqual(list(names_sk), list(names_ext)) self.assertEqual(X_sk.shape, X_ext.shape) self.assertEqual(X_sk, X_ext) @@ -357,7 +357,8 @@ def polynomial_features_csr_X_zero_row(self, zero_row_index, deg, interaction_on poly = PolynomialFeatures(degree=deg, include_bias=False, interaction_only=interaction_only) poly.fit(X) - self.assertEqual(poly.get_feature_names(), est.get_feature_names()) + self.assertEqual(list(poly.get_feature_names_out()), + list(est.get_feature_names_out())) Xt_dense1 = est.fit_transform(X) Xt_dense2 = poly.fit_transform(X) self.assertEqual(Xt_dense1, Xt_dense2) diff --git a/_unittests/ut_mlmodel/test_piecewise_classifier.py b/_unittests/ut_mlmodel/test_piecewise_classifier.py index 7380d8aa..0d699d1e 100644 --- a/_unittests/ut_mlmodel/test_piecewise_classifier.py +++ b/_unittests/ut_mlmodel/test_piecewise_classifier.py @@ -8,7 +8,10 @@ import pandas from sklearn.linear_model import LogisticRegression from pyquickhelper.pycode import ExtTestCase, ignore_warnings -from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv +from mlinsights.mlmodel import ( + run_test_sklearn_pickle, + run_test_sklearn_clone, + run_test_sklearn_grid_search_cv) from mlinsights.mlmodel.piecewise_estimator import PiecewiseClassifier @@ -161,19 +164,19 @@ def test_piecewise_classifier_pickle(self): X = random(100) Y = X > 0.5 # pylint: disable=W0143 X = X.reshape((100, 1)) # pylint: disable=E1101 - test_sklearn_pickle(lambda: LogisticRegression(), X, Y) - test_sklearn_pickle(lambda: PiecewiseClassifier(), X, Y) + run_test_sklearn_pickle(lambda: LogisticRegression(), X, Y) + run_test_sklearn_pickle(lambda: PiecewiseClassifier(), X, Y) def test_piecewise_classifier_clone(self): - test_sklearn_clone(lambda: PiecewiseClassifier(verbose=True)) + run_test_sklearn_clone(lambda: PiecewiseClassifier(verbose=True)) def test_piecewise_classifier_grid_search(self): X = random(100) Y = X > 0.5 # pylint: disable=W0143 X = X.reshape((100, 1)) # pylint: disable=E1101 - self.assertRaise(lambda: test_sklearn_grid_search_cv( + self.assertRaise(lambda: run_test_sklearn_grid_search_cv( lambda: PiecewiseClassifier(), X, Y), ValueError) - res = test_sklearn_grid_search_cv(lambda: PiecewiseClassifier(), + res = run_test_sklearn_grid_search_cv(lambda: PiecewiseClassifier(), X, Y, binner__max_depth=[2, 3]) self.assertIn('model', res) self.assertIn('score', res) diff --git a/_unittests/ut_mlmodel/test_piecewise_regressor.py b/_unittests/ut_mlmodel/test_piecewise_regressor.py index c39d4974..b4c29894 100644 --- a/_unittests/ut_mlmodel/test_piecewise_regressor.py +++ b/_unittests/ut_mlmodel/test_piecewise_regressor.py @@ -10,7 +10,10 @@ from sklearn.datasets import make_regression from sklearn.tree import DecisionTreeRegressor from pyquickhelper.pycode import ExtTestCase, ignore_warnings -from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv +from mlinsights.mlmodel import ( + run_test_sklearn_pickle, + run_test_sklearn_clone, + run_test_sklearn_grid_search_cv) from mlinsights.mlmodel.piecewise_estimator import PiecewiseRegressor @@ -128,11 +131,11 @@ def test_piecewise_regressor_pickle(self): eps = numpy.hstack([eps1, eps2]) X = X.reshape((100, 1)) # pylint: disable=E1101 Y = X.ravel() * 3.4 + 5.6 + eps - test_sklearn_pickle(lambda: LinearRegression(), X, Y) - test_sklearn_pickle(lambda: PiecewiseRegressor(), X, Y) + run_test_sklearn_pickle(lambda: LinearRegression(), X, Y) + run_test_sklearn_pickle(lambda: PiecewiseRegressor(), X, Y) def test_piecewise_regressor_clone(self): - test_sklearn_clone(lambda: PiecewiseRegressor(verbose=True)) + run_test_sklearn_clone(lambda: PiecewiseRegressor(verbose=True)) def test_piecewise_regressor_grid_search(self): X = random(100) @@ -141,10 +144,10 @@ def test_piecewise_regressor_grid_search(self): eps = numpy.hstack([eps1, eps2]) X = X.reshape((100, 1)) # pylint: disable=E1101 Y = X.ravel() * 3.4 + 5.6 + eps - self.assertRaise(lambda: test_sklearn_grid_search_cv( + self.assertRaise(lambda: run_test_sklearn_grid_search_cv( lambda: PiecewiseRegressor(), X, Y), ValueError) - res = test_sklearn_grid_search_cv(lambda: PiecewiseRegressor(), - X, Y, binner__max_depth=[2, 3]) + res = run_test_sklearn_grid_search_cv(lambda: PiecewiseRegressor(), + X, Y, binner__max_depth=[2, 3]) self.assertIn('model', res) self.assertIn('score', res) self.assertGreater(res['score'], 0) diff --git a/_unittests/ut_mlmodel/test_quantile_mlpregression.py b/_unittests/ut_mlmodel/test_quantile_mlpregression.py index c45a8266..d7a76857 100644 --- a/_unittests/ut_mlmodel/test_quantile_mlpregression.py +++ b/_unittests/ut_mlmodel/test_quantile_mlpregression.py @@ -11,7 +11,10 @@ from sklearn.exceptions import ConvergenceWarning from pyquickhelper.pycode import ExtTestCase, ignore_warnings from mlinsights.mlmodel import QuantileMLPRegressor -from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv +from mlinsights.mlmodel import ( + run_test_sklearn_pickle, + run_test_sklearn_clone, + run_test_sklearn_grid_search_cv) class TestQuantileMLPRegression(ExtTestCase): @@ -54,14 +57,14 @@ def test_quantile_regression_pickle(self): eps = numpy.hstack([eps1, eps2]) X = X.reshape((100, 1)) # pylint: disable=E1101 Y = X.ravel() * 3.4 + 5.6 + eps - test_sklearn_pickle(lambda: MLPRegressor( + run_test_sklearn_pickle(lambda: MLPRegressor( hidden_layer_sizes=(3,)), X, Y) - test_sklearn_pickle(lambda: QuantileMLPRegressor( + run_test_sklearn_pickle(lambda: QuantileMLPRegressor( hidden_layer_sizes=(3,)), X, Y) @ignore_warnings(ConvergenceWarning) def test_quantile_regression_clone(self): - test_sklearn_clone(lambda: QuantileMLPRegressor()) + run_test_sklearn_clone(lambda: QuantileMLPRegressor()) @ignore_warnings(ConvergenceWarning) def test_quantile_regression_grid_search(self): @@ -71,10 +74,11 @@ def test_quantile_regression_grid_search(self): eps = numpy.hstack([eps1, eps2]) X = X.reshape((100, 1)) # pylint: disable=E1101 Y = X.ravel() * 3.4 + 5.6 + eps - self.assertRaise(lambda: test_sklearn_grid_search_cv( + self.assertRaise(lambda: run_test_sklearn_grid_search_cv( lambda: QuantileMLPRegressor(hidden_layer_sizes=(3,)), X, Y), ValueError) - res = test_sklearn_grid_search_cv(lambda: QuantileMLPRegressor(hidden_layer_sizes=(3,)), - X, Y, learning_rate_init=[0.001, 0.0001]) + res = run_test_sklearn_grid_search_cv( + lambda: QuantileMLPRegressor(hidden_layer_sizes=(3,)), + X, Y, learning_rate_init=[0.001, 0.0001]) self.assertIn('model', res) self.assertIn('score', res) self.assertGreater(res['score'], 0) diff --git a/_unittests/ut_mlmodel/test_quantile_regression.py b/_unittests/ut_mlmodel/test_quantile_regression.py index cbbe3d3a..fb80b5a5 100644 --- a/_unittests/ut_mlmodel/test_quantile_regression.py +++ b/_unittests/ut_mlmodel/test_quantile_regression.py @@ -11,7 +11,10 @@ from pyquickhelper.pycode import ExtTestCase from pyquickhelper.texthelper import compare_module_version from mlinsights.mlmodel import QuantileLinearRegression -from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv +from mlinsights.mlmodel import ( + run_test_sklearn_pickle, + run_test_sklearn_clone, + run_test_sklearn_grid_search_cv) from mlinsights.mlmodel.quantile_mlpregressor import float_sign @@ -161,11 +164,11 @@ def test_quantile_regression_pickle(self): eps = numpy.hstack([eps1, eps2]) X = X.reshape((100, 1)) # pylint: disable=E1101 Y = X.ravel() * 3.4 + 5.6 + eps - test_sklearn_pickle(lambda: LinearRegression(), X, Y) - test_sklearn_pickle(lambda: QuantileLinearRegression(), X, Y) + run_test_sklearn_pickle(lambda: LinearRegression(), X, Y) + run_test_sklearn_pickle(lambda: QuantileLinearRegression(), X, Y) def test_quantile_regression_clone(self): - test_sklearn_clone(lambda: QuantileLinearRegression(delta=0.001)) + run_test_sklearn_clone(lambda: QuantileLinearRegression(delta=0.001)) def test_quantile_regression_grid_search(self): X = random(100) @@ -174,11 +177,11 @@ def test_quantile_regression_grid_search(self): eps = numpy.hstack([eps1, eps2]) X = X.reshape((100, 1)) # pylint: disable=E1101 Y = X.ravel() * 3.4 + 5.6 + eps - self.assertRaise(lambda: test_sklearn_grid_search_cv( + self.assertRaise(lambda: run_test_sklearn_grid_search_cv( lambda: QuantileLinearRegression(), X, Y), (ValueError, TypeError)) - res = test_sklearn_grid_search_cv(lambda: QuantileLinearRegression(), - X, Y, delta=[0.1, 0.001]) + res = run_test_sklearn_grid_search_cv(lambda: QuantileLinearRegression(), + X, Y, delta=[0.1, 0.001]) self.assertIn('model', res) self.assertIn('score', res) self.assertGreater(res['score'], 0) diff --git a/_unittests/ut_mlmodel/test_transfer_transformer.py b/_unittests/ut_mlmodel/test_transfer_transformer.py index 81a97179..0ecec14f 100644 --- a/_unittests/ut_mlmodel/test_transfer_transformer.py +++ b/_unittests/ut_mlmodel/test_transfer_transformer.py @@ -9,7 +9,7 @@ from sklearn.pipeline import make_pipeline, Pipeline from pyquickhelper.pycode import ExtTestCase from mlinsights.mlmodel import TransferTransformer -from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone +from mlinsights.mlmodel import run_test_sklearn_pickle, run_test_sklearn_clone class TestTransferTransformer(ExtTestCase): @@ -108,14 +108,14 @@ def test_transfer_transformer_cloned0(self): norm.fit(X) tr1 = TransferTransformer(norm, copy_estimator=True) - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=False) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=False) tr1.fit(X) - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=False) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=False) tr1 = TransferTransformer(norm, copy_estimator=True) - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) tr1.fit(X) - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) def test_transfer_transformer_pickle(self): @@ -131,7 +131,7 @@ def test_transfer_transformer_pickle(self): pipe = make_pipeline(TransferTransformer(norm), TransferTransformer(clr)) pipe.fit(X) - test_sklearn_pickle(lambda: pipe, X, Y) + run_test_sklearn_pickle(lambda: pipe, X, Y) def test_transfer_transformer_clone(self): @@ -145,14 +145,14 @@ def test_transfer_transformer_clone(self): clr.fit(X2, Y) tr1 = TransferTransformer(norm, copy_estimator=False) - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) tr1.fit(X) - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) tr1 = TransferTransformer(norm, copy_estimator=True) - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) tr1.fit(X) - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) tr1 = TransferTransformer(norm, copy_estimator=True) tr2 = TransferTransformer(clr, copy_estimator=True) @@ -160,9 +160,9 @@ def test_transfer_transformer_clone(self): pipe.fit(X) self.maxDiff = None - test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) - test_sklearn_clone(lambda: tr2, ext=self, copy_fitted=True) - test_sklearn_clone(lambda: pipe, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: tr1, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: tr2, ext=self, copy_fitted=True) + run_test_sklearn_clone(lambda: pipe, ext=self, copy_fitted=True) if __name__ == "__main__": diff --git a/_unittests/ut_mlmodel/test_tsne_predictable.py b/_unittests/ut_mlmodel/test_tsne_predictable.py index 39692b0c..ee7dfd4a 100644 --- a/_unittests/ut_mlmodel/test_tsne_predictable.py +++ b/_unittests/ut_mlmodel/test_tsne_predictable.py @@ -14,7 +14,7 @@ from pyquickhelper.pycode import ( ExtTestCase, skipif_circleci, ignore_warnings) from mlinsights.mlmodel import PredictableTSNE -from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone +from mlinsights.mlmodel import run_test_sklearn_pickle, run_test_sklearn_clone class TestPredictableTSNE(ExtTestCase): @@ -57,12 +57,12 @@ def test_predictable_tsne_intercept_weights(self): def test_predictable_tsne_pickle(self): iris = datasets.load_iris() X, y = iris.data[:20], iris.target[:20] - test_sklearn_pickle(lambda: PredictableTSNE(), X, y) + run_test_sklearn_pickle(lambda: PredictableTSNE(), X, y) @ignore_warnings(ConvergenceWarning) def test_predictable_tsne_clone(self): self.maxDiff = None - test_sklearn_clone(lambda: PredictableTSNE()) + run_test_sklearn_clone(lambda: PredictableTSNE()) @ignore_warnings(ConvergenceWarning) def test_predictable_tsne_relevance(self): @@ -79,13 +79,13 @@ def test_predictable_tsne_relevance(self): Ys.extend([cl for i in range(n)]) X = numpy.vstack(Xs) Y = numpy.array(Ys) - clk = PredictableTSNE(transformer=TSNE(n_components=3), + clk = PredictableTSNE(transformer=TSNE(n_components=2), normalizer=StandardScaler(with_mean=False), keep_tsne_outputs=True) clk.fit(X, Y) pred = clk.transform(X) self.assertGreater(clk.loss_, 0) - self.assertEqual(pred.shape, (X.shape[0], 3)) + self.assertEqual(pred.shape, (X.shape[0], 2)) if __name__ == "__main__": diff --git a/mlinsights/mlmodel/__init__.py b/mlinsights/mlmodel/__init__.py index df30feb0..1556b621 100644 --- a/mlinsights/mlmodel/__init__.py +++ b/mlinsights/mlmodel/__init__.py @@ -16,8 +16,14 @@ from .predictable_tsne import PredictableTSNE from .quantile_mlpregressor import QuantileMLPRegressor from .quantile_regression import QuantileLinearRegression -from .sklearn_testing import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv +from .sklearn_testing import ( + run_test_sklearn_pickle, + run_test_sklearn_clone, + run_test_sklearn_grid_search_cv) from .sklearn_text import TraceableTfidfVectorizer, TraceableCountVectorizer -from .sklearn_transform_inv_fct import FunctionReciprocalTransformer, PermutationReciprocalTransformer -from .target_predictors import TransformedTargetClassifier2, TransformedTargetRegressor2 +from .sklearn_transform_inv_fct import ( + FunctionReciprocalTransformer, + PermutationReciprocalTransformer) +from .target_predictors import ( + TransformedTargetClassifier2, TransformedTargetRegressor2) from .transfer_transformer import TransferTransformer diff --git a/mlinsights/mlmodel/extended_features.py b/mlinsights/mlmodel/extended_features.py index 66224dc9..6e20c6ab 100644 --- a/mlinsights/mlmodel/extended_features.py +++ b/mlinsights/mlmodel/extended_features.py @@ -46,7 +46,7 @@ def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False, self.poly_include_bias = poly_include_bias self.poly_interaction_only = poly_interaction_only - def get_feature_names(self, input_features=None): + def get_feature_names_out(self, input_features=None): """ Returns feature names for output features. @@ -118,7 +118,7 @@ def fit(self, X, y=None): :return: self : instance """ self.n_input_features_ = X.shape[1] - self.n_output_features_ = len(self.get_feature_names()) + self.n_output_features_ = len(self.get_feature_names_out()) if self.kind == 'poly': return self._fit_poly(X, y) diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx index 3c76b92b..760af0c6 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx +++ b/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx @@ -63,7 +63,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion): # Criterion interface self.sample_weight = None - self.samples_indices = None + self.sample_indices = None # allocation if self.sample_w == NULL: @@ -93,7 +93,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion): const DOUBLE_t[:, ::1] y, const DOUBLE_t[:] sample_weight, double weighted_n_samples, - const SIZE_t[:] samples_indices, + const SIZE_t[:] sample_indices, SIZE_t start, SIZE_t end) nogil except -1: """ Initializes the criterion. @@ -127,7 +127,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion): # Filling accumulators. for ki in range(start, end): - ks = samples_indices[ki] + ks = sample_indices[ki] self.sample_i[ki] = ks self.sample_w[ki] = sample_weight[ks] if sample_weight is not None else 1. self.sample_wy[ki] = self.sample_w[ki] * y[ks, 0] diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx index 7aa519fc..9d37fd42 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx +++ b/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx @@ -56,7 +56,7 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): # Criterion interface self.sample_weight = None - self.samples_indices = None + self.sample_indices = None # allocations if self.sample_w_left == NULL: @@ -86,7 +86,7 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): const DOUBLE_t[:, ::1] y, const DOUBLE_t[:] sample_weight, double weighted_n_samples, - const SIZE_t[:] samples_indices, + const SIZE_t[:] sample_indices, SIZE_t start, SIZE_t end) nogil except -1: """ Initializes the criterion. @@ -124,14 +124,14 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): # Left side. for ki in range(start, start+1): - ks = samples_indices[ki] + ks = sample_indices[ki] w = sample_weight[ks] if sample_weight is not None else 1. y_ = y[ks, 0] self.sample_w_left[ki] = w self.sample_wy_left[ki] = w * y_ self.sample_wy2_left[ki] = w * y_ * y_ for ki in range(start+1, end): - ks = samples_indices[ki] + ks = sample_indices[ki] w = sample_weight[ks] if sample_weight is not None else 1. y_ = y[ks, 0] self.sample_w_left[ki] = self.sample_w_left[ki-1] + w diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx index 3dcc491d..c823603a 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx +++ b/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx @@ -91,7 +91,7 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): # Criterion interface self.sample_weight = None - self.samples_indices = None + self.sample_indices = None # allocation if self.sample_w == NULL: @@ -138,21 +138,20 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): :return: an instance of :class:`LinearRegressorCriterion` """ cdef SIZE_t i - cdef DOUBLE_t* ws + cdef DOUBLE_t[:] ws cdef double sum - cdef SIZE_t* parr = calloc(y.shape[0], sizeof(SIZE_t)) + cdef SIZE_t[:] parr = numpy.empty(y.shape[0], dtype=numpy.int64) for i in range(0, y.shape[0]): parr[i] = i if sample_weight is None: sum = y.shape[0] - ws = NULL + ws = None else: sum = sample_weight.sum() - ws = &sample_weight[0] + ws = sample_weight obj = LinearRegressorCriterion(1 if len(y.shape) <= 1 else y.shape[0], X) obj.init(y, ws, sum, parr, 0, y.shape[0]) - free(parr) return obj cdef int init(self, const DOUBLE_t[:, ::1] y, @@ -178,7 +177,7 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): const DOUBLE_t[:, ::1] y, const DOUBLE_t[:] sample_weight, double weighted_n_samples, - const SIZE_t[:] samples, + const SIZE_t[:] sample_indices, SIZE_t start, SIZE_t end) nogil except -1: """ Initializes the criterion. @@ -212,7 +211,7 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): # Filling accumulators. idx = start * self.nbvar for ki in range(start, end): - ks = samples_indices[ki] + ks = sample_indices[ki] self.sample_i[ki] = ks self.sample_w[ki] = sample_weight[ks] if sample_weight is not None else 1. self.sample_wy[ki] = self.sample_w[ki] * y[ks, 0] diff --git a/mlinsights/mlmodel/sklearn_testing.py b/mlinsights/mlmodel/sklearn_testing.py index 359e7bee..09c6cf48 100644 --- a/mlinsights/mlmodel/sklearn_testing.py +++ b/mlinsights/mlmodel/sklearn_testing.py @@ -43,7 +43,7 @@ def train_test_split_with_none(X, y=None, sample_weight=None, random_state=0): return X_train, y_train, w_train, X_test, y_test, w_test -def test_sklearn_pickle(fct_model, X, y=None, sample_weight=None, **kwargs): +def run_test_sklearn_pickle(fct_model, X, y=None, sample_weight=None, **kwargs): """ Creates a model, fit, predict and check the prediction are similar after the model was pickled, unpickled. @@ -108,7 +108,7 @@ def assertIsInstance(self, inst, cltype): return cls() -def test_sklearn_clone(fct_model, ext=None, copy_fitted=False): +def run_test_sklearn_clone(fct_model, ext=None, copy_fitted=False): """ Tests that a cloned model is similar to the original one. @@ -233,7 +233,7 @@ def assert_estimator_equal(esta, estb, ext=None): list(sorted(esta.__dict__)), list(sorted(estb.__dict__)))) -def test_sklearn_grid_search_cv(fct_model, X, y=None, sample_weight=None, **grid_params): +def run_test_sklearn_grid_search_cv(fct_model, X, y=None, sample_weight=None, **grid_params): """ Creates a model, checks that a grid search works with it. From e42687839f56d5e55e72adc75b4ec8ef60a90187 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sun, 8 Jan 2023 11:59:54 +0100 Subject: [PATCH 03/17] fix compilation issues --- mlinsights/mlmodel/_piecewise_tree_regression_common120.pyx | 4 ++-- .../mlmodel/piecewise_tree_regression_criterion_fast.pyx | 4 ++-- .../mlmodel/piecewise_tree_regression_criterion_linear.pyx | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mlinsights/mlmodel/_piecewise_tree_regression_common120.pyx b/mlinsights/mlmodel/_piecewise_tree_regression_common120.pyx index 80b2064f..d3d4af7a 100644 --- a/mlinsights/mlmodel/_piecewise_tree_regression_common120.pyx +++ b/mlinsights/mlmodel/_piecewise_tree_regression_common120.pyx @@ -225,12 +225,12 @@ def _test_criterion_init(Criterion criterion, const DOUBLE_t[:, ::1] y, DOUBLE_t[:] sample_weight, double weighted_n_samples, - SIZE_t[:] samples, + SIZE_t[:] sample_indices, SIZE_t start, SIZE_t end): "Test purposes. Methods cannot be directly called from python." criterion.init(y, sample_weight, weighted_n_samples, - samples, start, end) + sample_indices, start, end) def _test_criterion_check(Criterion criterion): diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx index 9d37fd42..7773c6c7 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx +++ b/mlinsights/mlmodel/piecewise_tree_regression_criterion_fast.pyx @@ -69,7 +69,7 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): cdef int init(self, const DOUBLE_t[:, ::1] y, const DOUBLE_t[:] sample_weight, double weighted_n_samples, - const SIZE_t[:] samples, + const SIZE_t[:] sample_indices, SIZE_t start, SIZE_t end) nogil except -1: """ This function is overwritten to check *y* and *X* size are the same. @@ -80,7 +80,7 @@ cdef class SimpleRegressorCriterionFast(CommonRegressorCriterion): if y.shape[1] != 1: raise ValueError("This class only works for a single vector.") return self.init_with_X(y, sample_weight, weighted_n_samples, - samples, start, end) + sample_indices, start, end) cdef int init_with_X(self, const DOUBLE_t[:, ::1] y, diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx index c823603a..d468644a 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx +++ b/mlinsights/mlmodel/piecewise_tree_regression_criterion_linear.pyx @@ -157,7 +157,7 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): cdef int init(self, const DOUBLE_t[:, ::1] y, const DOUBLE_t[:] sample_weight, double weighted_n_samples, - const SIZE_t[:] samples, + const SIZE_t[:] sample_indices, SIZE_t start, SIZE_t end) nogil except -1: """ This function is overwritten to check *y* and *X* size are the same. @@ -171,7 +171,7 @@ cdef class LinearRegressorCriterion(CommonRegressorCriterion): if y.shape[1] != 1: raise ValueError("This class only works for a single vector.") return self.init_with_X(self.sample_X, y, sample_weight, weighted_n_samples, - samples, start, end) + sample_indices, start, end) cdef int init_with_X(self, const DOUBLE_t[:, ::1] X, const DOUBLE_t[:, ::1] y, From 73a88e51250bef5ed01d6a6a038a753d5cde2fa5 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sun, 8 Jan 2023 13:05:31 +0100 Subject: [PATCH 04/17] switch to development version of scikit-learn --- requirements.txt | 2 +- setup.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 76903d60..ae6825f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ pydata-sphinx-theme pyquickhelper>=1.10 pyquicksetup pylint>=2.14.0 -scikit-learn>=1.2 +git+https://github.com/scikit-learn/scikit-learn.git scipy seaborn skl2onnx diff --git a/setup.py b/setup.py index 71333521..53147c44 100644 --- a/setup.py +++ b/setup.py @@ -55,11 +55,11 @@ def get_extensions(): extensions = ["direct_blas_lapack"] spl = sklearn.__version__.split('.') vskl = (int(spl[0]), int(spl[1])) - if vskl >= (1, 2): + if vskl > (1, 2): extensions.append(("_piecewise_tree_regression_common", "_piecewise_tree_regression_common120")) else: - raise ImportError("Cannot build mlisinghts for scikit-learn<1.0.") + raise ImportError("Cannot build mlisinghts for scikit-learn<=1.2.") extensions.extend([ "piecewise_tree_regression_criterion", @@ -122,7 +122,7 @@ def get_extensions(): package_dir=package_dir, package_data=package_data, setup_requires=["pyquicksetup", 'cython', 'scipy', 'scikit-learn'], - install_requires=['cython', 'scikit-learn>=1.2', 'pandas', 'scipy', + install_requires=['cython', 'scikit-learn>1.2', 'pandas', 'scipy', 'matplotlib', 'pandas_streaming', 'numpy>=1.16'], ext_modules=ext_modules, # cythonize(ext_modules), ) From cf4b29ea38ff1adab27ecbc1582ce0d9906cec72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 8 Jan 2023 14:03:08 +0100 Subject: [PATCH 05/17] remove normalize, update to the latest API --- .gitignore | 1 + _unittests/ut_sklapi/test_sklearn_convert.py | 12 ++++-------- _unittests/ut_sklapi/test_sklearn_stacking.py | 8 ++++---- mlinsights/mltree/tree_structure.py | 4 ++-- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 6703d4c3..69593268 100644 --- a/.gitignore +++ b/.gitignore @@ -81,6 +81,7 @@ build/ *.log *.scc *.so +*.pyd # Visual C++ cache files ipch/ diff --git a/_unittests/ut_sklapi/test_sklearn_convert.py b/_unittests/ut_sklapi/test_sklearn_convert.py index 03bb7679..df4d5641 100644 --- a/_unittests/ut_sklapi/test_sklearn_convert.py +++ b/_unittests/ut_sklapi/test_sklearn_convert.py @@ -107,19 +107,15 @@ def test_pipeline_with_two_regressors(self): @ignore_warnings(ConvergenceWarning) def test_pipeline_with_params(self): - conv = SkBaseTransformLearner(LinearRegression(normalize=True)) + conv = SkBaseTransformLearner(LinearRegression()) pipe = make_pipeline(conv, DecisionTreeRegressor()) pars = pipe.get_params() self.assertIn('skbasetransformlearner__model__fit_intercept', pars) - self.assertEqual( - pars['skbasetransformlearner__model__normalize'], True) - conv = SkBaseTransformLearner(LinearRegression(normalize=True)) + conv = SkBaseTransformLearner(LinearRegression(fit_intercept=True)) pipe = make_pipeline(conv, DecisionTreeRegressor()) pipe.set_params(**pars) pars = pipe.get_params() self.assertIn('skbasetransformlearner__model__fit_intercept', pars) - self.assertEqual( - pars['skbasetransformlearner__model__normalize'], True) @ignore_warnings(ConvergenceWarning) def test_pickle(self): @@ -129,7 +125,7 @@ def test_pickle(self): X2=[0.5, 0.6, 0.7, 0.5, 1.5, 1.6, 1.7, 1.8])) X = df.drop('y', axis=1) y = df['y'] - model = SkBaseTransformLearner(LinearRegression(normalize=True)) + model = SkBaseTransformLearner(LinearRegression()) model.fit(X, y) pred = model.transform(X) @@ -149,7 +145,7 @@ def test_grid(self): X2=[0.5, 0.6, 0.7, 0.5, 1.5, 1.6, 1.7, 1.8])) X = df.drop('y', axis=1) y = df['y'] - model = make_pipeline(SkBaseTransformLearner(LinearRegression(normalize=True)), + model = make_pipeline(SkBaseTransformLearner(LinearRegression()), LogisticRegression()) res = model.get_params(True) self.assertGreater(len(res), 0) diff --git a/_unittests/ut_sklapi/test_sklearn_stacking.py b/_unittests/ut_sklapi/test_sklearn_stacking.py index 01b6cfb8..a648237d 100644 --- a/_unittests/ut_sklapi/test_sklearn_stacking.py +++ b/_unittests/ut_sklapi/test_sklearn_stacking.py @@ -88,7 +88,7 @@ def test_pipeline_with_two_transforms(self): @ignore_warnings(ConvergenceWarning) def test_pipeline_with_params(self): - conv = SkBaseTransformStacking([LinearRegression(normalize=True), + conv = SkBaseTransformStacking([LinearRegression(), DecisionTreeClassifier(max_depth=3)]) pipe = make_pipeline(conv, DecisionTreeRegressor()) pars = pipe.get_params(deep=True) @@ -111,7 +111,7 @@ def test_pickle(self): data = load_iris() X, y = data.data, data.target # X_train, X_test, y_train, y_test = train_test_split(X, y) - conv = SkBaseTransformStacking([LinearRegression(normalize=True), + conv = SkBaseTransformStacking([LinearRegression(), DecisionTreeClassifier(max_depth=3)]) model = make_pipeline(conv, DecisionTreeRegressor()) model.fit(X, y) @@ -127,7 +127,7 @@ def test_pickle(self): @ignore_warnings(ConvergenceWarning) def test_clone(self): - conv = SkBaseTransformStacking([LinearRegression(normalize=True), + conv = SkBaseTransformStacking([LinearRegression(), DecisionTreeClassifier(max_depth=3)], 'predict') cloned = clone(conv) @@ -138,7 +138,7 @@ def test_grid(self): data = load_iris() X, y = data.data, data.target # X_train, X_test, y_train, y_test = train_test_split(X, y) - conv = SkBaseTransformStacking([LinearRegression(normalize=True), + conv = SkBaseTransformStacking([LinearRegression(), DecisionTreeClassifier(max_depth=3)]) model = make_pipeline(conv, DecisionTreeRegressor()) diff --git a/mlinsights/mltree/tree_structure.py b/mlinsights/mltree/tree_structure.py index 88faeb47..12dcce79 100644 --- a/mlinsights/mltree/tree_structure.py +++ b/mlinsights/mltree/tree_structure.py @@ -249,7 +249,7 @@ def tree_leave_neighbors(model): cells = numpy.full(shape, 0, numpy.int32) while pos[0] < len(features[keys[0]]) - 1: # evaluate - xy = numpy.zeros((1, model.n_features_)) + xy = numpy.zeros((1, model.n_features_in_)) for p, k in zip(pos, keys): xy[0, k] = (features[k][p] + features[k][p + 1]) / 2 leave = predict_leaves(model, xy) @@ -287,7 +287,7 @@ def tree_leave_neighbors(model): edge = (cl, cl2) if cl < cl2 else (cl2, cl) if edge not in neighbors: neighbors[edge] = [] - xy = numpy.zeros((model.n_features_)) + xy = numpy.zeros((model.n_features_in_)) for p, f in zip(pos, keys): xy[f] = (features[f][p] + features[f][p + 1]) / 2 x2 = tuple(xy) From 8c46e6179af1105c30dfc4078413ba6c64e45e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 8 Jan 2023 14:03:26 +0100 Subject: [PATCH 06/17] Update test_sklearn_stacking.py --- _unittests/ut_sklapi/test_sklearn_stacking.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/_unittests/ut_sklapi/test_sklearn_stacking.py b/_unittests/ut_sklapi/test_sklearn_stacking.py index a648237d..7e3ea64d 100644 --- a/_unittests/ut_sklapi/test_sklearn_stacking.py +++ b/_unittests/ut_sklapi/test_sklearn_stacking.py @@ -94,8 +94,6 @@ def test_pipeline_with_params(self): pars = pipe.get_params(deep=True) self.assertIn( 'skbasetransformstacking__models_0__model__fit_intercept', pars) - self.assertEqual( - pars['skbasetransformstacking__models_0__model__normalize'], True) conv = SkBaseTransformStacking([LinearRegression(normalize=False), DecisionTreeClassifier(max_depth=2)]) pipe = make_pipeline(conv, DecisionTreeRegressor()) @@ -103,8 +101,6 @@ def test_pipeline_with_params(self): pars = pipe.get_params() self.assertIn( 'skbasetransformstacking__models_0__model__fit_intercept', pars) - self.assertEqual( - pars['skbasetransformstacking__models_0__model__normalize'], True) @ignore_warnings(ConvergenceWarning) def test_pickle(self): From 1316fa1078180485ed5da1b626327cc3047d17d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 8 Jan 2023 23:09:04 +0100 Subject: [PATCH 07/17] Update test_sklearn_stacking.py --- _unittests/ut_sklapi/test_sklearn_stacking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_unittests/ut_sklapi/test_sklearn_stacking.py b/_unittests/ut_sklapi/test_sklearn_stacking.py index 7e3ea64d..0800268a 100644 --- a/_unittests/ut_sklapi/test_sklearn_stacking.py +++ b/_unittests/ut_sklapi/test_sklearn_stacking.py @@ -94,7 +94,7 @@ def test_pipeline_with_params(self): pars = pipe.get_params(deep=True) self.assertIn( 'skbasetransformstacking__models_0__model__fit_intercept', pars) - conv = SkBaseTransformStacking([LinearRegression(normalize=False), + conv = SkBaseTransformStacking([LinearRegression(), DecisionTreeClassifier(max_depth=2)]) pipe = make_pipeline(conv, DecisionTreeRegressor()) pipe.set_params(**pars) From b12859aab4754c3faab512f1739cfe1f92dec639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 8 Jan 2023 23:57:11 +0100 Subject: [PATCH 08/17] Update test_piecewise_classifier.py --- _unittests/ut_mlmodel/test_piecewise_classifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_unittests/ut_mlmodel/test_piecewise_classifier.py b/_unittests/ut_mlmodel/test_piecewise_classifier.py index 0d699d1e..9f43bcb3 100644 --- a/_unittests/ut_mlmodel/test_piecewise_classifier.py +++ b/_unittests/ut_mlmodel/test_piecewise_classifier.py @@ -176,8 +176,8 @@ def test_piecewise_classifier_grid_search(self): X = X.reshape((100, 1)) # pylint: disable=E1101 self.assertRaise(lambda: run_test_sklearn_grid_search_cv( lambda: PiecewiseClassifier(), X, Y), ValueError) - res = run_test_sklearn_grid_search_cv(lambda: PiecewiseClassifier(), - X, Y, binner__max_depth=[2, 3]) + res = run_test_sklearn_grid_search_cv( + lambda: PiecewiseClassifier(), X, Y, binner__max_depth=[2, 3]) self.assertIn('model', res) self.assertIn('score', res) self.assertGreater(res['score'], 0) From e8d34fad61f60a1f73a409fd104101b03d977018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sat, 4 Feb 2023 02:18:16 +0100 Subject: [PATCH 09/17] update requirements --- requirements-win.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-win.txt b/requirements-win.txt index b04b2c7e..21a75ae0 100644 --- a/requirements-win.txt +++ b/requirements-win.txt @@ -13,5 +13,5 @@ psutil pylint>=2.14.0 pymyinstall pyshp -scikit-learn>=1.2 +scikit-learn>=1.2.1 threadpoolctl diff --git a/requirements.txt b/requirements.txt index ae6825f3..049bef1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ pydata-sphinx-theme pyquickhelper>=1.10 pyquicksetup pylint>=2.14.0 -git+https://github.com/scikit-learn/scikit-learn.git +scikit-learn>=1.2.1 scipy seaborn skl2onnx From 4d955b3ac92ce59d4401bc876214f84b0f48a9fb Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Sun, 2 Jul 2023 15:24:43 +0200 Subject: [PATCH 10/17] fix for scikit-learn==1.3.0 --- .local.jenkins.lin.yml | 1 - appveyor.yml | 1 - azure-pipelines.yml | 13 +- .../_piecewise_tree_regression_common.pyx | 308 ++++++++++++++++++ mlinsights/mltree/_tree_digitize.pyx | 14 +- requirements-win.txt | 17 - requirements.txt | 2 +- setup.py | 4 +- 8 files changed, 323 insertions(+), 37 deletions(-) create mode 100644 mlinsights/mlmodel/_piecewise_tree_regression_common.pyx delete mode 100644 requirements-win.txt diff --git a/.local.jenkins.lin.yml b/.local.jenkins.lin.yml index 5a88a30a..403b9d91 100644 --- a/.local.jenkins.lin.yml +++ b/.local.jenkins.lin.yml @@ -11,7 +11,6 @@ install: - $PYINT -m pip install --upgrade pip - $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pyquickhelper cpyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/ - $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ scikit-learn>=0.22 --extra-index-url=https://pypi.python.org/simple/ - - $PYINT -m pip install -r requirements-win.txt - $PYINT -m pip install -r requirements.txt - $PYINT --version - $PYINT -m pip freeze diff --git a/appveyor.yml b/appveyor.yml index 20257516..641fc25d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -13,7 +13,6 @@ install: - "%PYTHON%\\python -m pip install --upgrade pip" # for many packages - "%PYTHON%\\Scripts\\pip install llvmlite numba" - - "%PYTHON%\\Scripts\\pip install -r requirements-win.txt" # install precompiled versions not available on pypi - "%PYTHON%\\Scripts\\pip install torch torchvision torchaudio" # other dependencies diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d29610d7..5dfac303 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -51,16 +51,9 @@ jobs: python -m pip install cibuildwheel export CIBW_MANYLINUX_X86_64_IMAGE="manylinux_2_24" export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming" - export CIBW_BUILD="cp39-manylinux_x86_64 cp310-manylinux_x86_64" + export CIBW_BUILD="cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64" python -m cibuildwheel --output-dir dist/wheelhouse_2 --platform linux displayName: 'Build Package manylinux_x_y' - - script: | - python -m pip install cibuildwheel - export CIBW_MANYLINUX_X86_64_IMAGE="manylinux2014" - export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming" - export CIBW_BUILD="cp37-manylinux_x86_64 cp38-manylinux_x86_64 cp39-manylinux_x86_64" - python -m cibuildwheel --output-dir dist/wheelhouse --platform linux - displayName: 'Build Package manylinux2014' - task: PublishPipelineArtifact@0 inputs: artifactName: 'wheel-linux-$(python.version)' @@ -92,7 +85,7 @@ jobs: - script: | python -m pip install cibuildwheel set CIBW_BEFORE_BUILD=pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming - set CIBW_BUILD=cp37-win_amd64 cp38-win_amd64 cp39-win_amd64 cp310-win_amd64 + set CIBW_BUILD=cp39-win_amd64 cp310-win_amd64 cp311-win_amd64 python -m cibuildwheel --output-dir dist/wheelhouse displayName: 'Build Package many' - task: PublishPipelineArtifact@0 @@ -159,7 +152,7 @@ jobs: - script: | python -m pip install cibuildwheel export CIBW_BEFORE_BUILD="pip install pybind11 cython numpy scipy pyquickhelper scikit-learn pandas pandas_streaming" - export CIBW_BUILD="cp37-macosx_x86_64 cp38-macosx_x86_64 cp39-macosx_x86_64 cp310-macosx_x86_64" + export CIBW_BUILD="cp39-macosx_x86_64 cp310-macosx_x86_64 cp311-macosx_x86_64" python -m cibuildwheel --output-dir dist/wheelhouse displayName: 'Build Package many' - task: PublishPipelineArtifact@0 diff --git a/mlinsights/mlmodel/_piecewise_tree_regression_common.pyx b/mlinsights/mlmodel/_piecewise_tree_regression_common.pyx new file mode 100644 index 00000000..28bd4b32 --- /dev/null +++ b/mlinsights/mlmodel/_piecewise_tree_regression_common.pyx @@ -0,0 +1,308 @@ +""" +@file +@brief Implements a custom criterion to train a decision tree. +""" +from libc.stdlib cimport calloc, free +from libc.stdio cimport printf +from libc.math cimport NAN + +import numpy +cimport numpy +numpy.import_array() + +from sklearn.tree._criterion cimport Criterion +from sklearn.tree._criterion cimport SIZE_t, DOUBLE_t + + +cdef class CommonRegressorCriterion(Criterion): + """ + Common class to implement various version of `mean square error + `_. + The code was inspired from + `hellinger_distance_criterion.pyx + `_, + `Cython example of exposing C-computed arrays in Python without data copies + `_, + `_criterion.pyx + `_. + This implementation is not efficient but was made that way on purpose. + It adds the features to the class. + + If the file does not compile, some explanations are given + in :ref:`scikit-learn internal API + `_. + """ + def __getstate__(self): + return {} + + def __setstate__(self, d): + pass + + def __deepcopy__(self, memo=None): + """ + This does not a copy but mostly creates a new instance + of the same criterion initialized with the same data. + """ + inst = self.__class__(self.n_outputs, self.n_samples) + return inst + + cdef void _update_weights(self, SIZE_t start, SIZE_t end, SIZE_t old_pos, SIZE_t new_pos) nogil: + """ + Updates members `weighted_n_right` and `weighted_n_left` + when `pos` changes. This method should be overloaded. + """ + pass + + cdef int reset(self) nogil except -1: + """ + Resets the criterion at *pos=start*. + This method must be implemented by the subclass. + """ + self._update_weights(self.start, self.end, self.pos, self.start) + self.pos = self.start + + cdef int reverse_reset(self) nogil except -1: + """ + Resets the criterion at *pos=end*. + This method must be implemented by the subclass. + """ + self._update_weights(self.start, self.end, self.pos, self.end) + self.pos = self.end + + cdef int update(self, SIZE_t new_pos) nogil except -1: + """ + Updates statistics by moving ``samples[pos:new_pos]`` to the left child. + This updates the collected statistics by moving ``samples[pos:new_pos]`` + from the right child to the left child. It must be implemented by + the subclass. + + :param new_pos: SIZE_t + New starting index position of the samples in the right child + """ + self._update_weights(self.start, self.end, self.pos, new_pos) + self.pos = new_pos + + cdef void _mean(self, SIZE_t start, SIZE_t end, DOUBLE_t *mean, DOUBLE_t *weight) nogil: + """ + Computes the mean of *y* between *start* and *end*. + """ + raise NotImplementedError("Method _mean must be overloaded.") + + cdef double _mse(self, SIZE_t start, SIZE_t end, DOUBLE_t mean, DOUBLE_t weight) nogil: + """ + Computes mean square error between *start* and *end* + assuming corresponding points are approximated by a constant. + """ + raise NotImplementedError("Method _mean must be overloaded.") + + cdef void children_impurity_weights(self, double* impurity_left, + double* impurity_right, + double* weight_left, + double* weight_right) nogil: + """ + Calculates the impurity of children, + evaluates the impurity in + children nodes, i.e. the impurity of ``samples[start:pos]`` + the impurity of ``samples[pos:end]``. + + :param impurity_left: double pointer + The memory address where the impurity of the left child should be + stored. + :param impurity_right: double pointer + The memory address where the impurity of the right child should be + stored. + :param weight_left: double pointer + The memory address where the weight of the left child should be + stored. + :param weight_right: double pointer + The memory address where the weight of the right child should be + stored. + """ + cdef DOUBLE_t mleft, mright + self._mean(self.start, self.pos, &mleft, weight_left) + self._mean(self.pos, self.end, &mright, weight_right) + impurity_left[0] = self._mse(self.start, self.pos, mleft, weight_left[0]) + impurity_right[0] = self._mse(self.pos, self.end, mright, weight_right[0]) + + #################### + # functions used by a the tree optimizer + #################### + + cdef double node_impurity(self) nogil: + """ + Calculates the impurity of the node, + the impurity of ``samples[start:end]``. + This is the primary function of the criterion class. + """ + cdef DOUBLE_t mean, weight + self._mean(self.start, self.end, &mean, &weight) + return self._mse(self.start, self.end, mean, weight) + + cdef void children_impurity(self, double* impurity_left, + double* impurity_right) nogil: + """ + Calculates the impurity of children. + + :param impurity_left: double pointer + The memory address where the impurity of the left child should be + stored. + :param impurity_right: double pointer + The memory address where the impurity of the right child should be + stored. + """ + cdef DOUBLE_t wl, wr + self.children_impurity_weights(impurity_left, impurity_right, &wl, &wr) + + cdef void node_value(self, double* dest) nogil: + """ + Computes the node value, usually, the prediction + the tree would do. Stores the value into *dest*. + + :param dest: double pointer + The memory address where the node value should be stored. + """ + cdef DOUBLE_t weight + self._mean(self.start, self.end, dest, &weight) + + cdef double proxy_impurity_improvement(self) nogil: + """ + Computes a proxy of the impurity reduction + This method is used to speed up the search for the best split. + It is a proxy quantity such that the split that maximizes this value + also maximizes the impurity improvement. It neglects all constant terms + of the impurity decrease for a given split. + The absolute impurity improvement is only computed by the + *impurity_improvement* method once the best split has been found. + """ + cdef double impurity_left + cdef double impurity_right + self.children_impurity_weights(&impurity_left, &impurity_right, + &self.weighted_n_left, &self.weighted_n_right) + if self.pos == self.start or self.pos == self.end: + return NAN + + return (- self.weighted_n_right * impurity_right + - self.weighted_n_left * impurity_left) + + cdef double impurity_improvement(self, double impurity_parent, + double impurity_left, + double impurity_right) nogil: + """ + Computes the improvement in impurity + This method computes the improvement in impurity when a split occurs. + The weighted impurity improvement equation is the following:: + + N_t / N * (impurity - N_t_R / N_t * right_impurity + - N_t_L / N_t * left_impurity) + + where *N* is the total number of samples, *N_t* is the number of samples + at the current node, *N_t_L* is the number of samples in the left child, + and *N_t_R* is the number of samples in the right child, + + :param impurity_parent: double + The initial impurity of the node before the split + :param impurity_left: double + The impurity of the left child + :param impurity_right: double + The impurity of the right child + :return: double, improvement in impurity after the split occurs + """ + # self.children_impurity_weights(&impurity_left, &impurity_right, + # &self.weighted_n_left, &self.weighted_n_right) + # if self.pos == self.start or self.pos == self.end: + # return NAN + + # cdef double weight = self.weighted_n_left + self.weighted_n_right + cdef double weight = self.weighted_n_node_samples + return ((weight / self.weighted_n_samples) * + (impurity_parent - (self.weighted_n_right / weight * impurity_right) + - (self.weighted_n_left / weight * impurity_left))) + + +def _test_criterion_init(Criterion criterion, + const DOUBLE_t[:, ::1] y, + DOUBLE_t[:] sample_weight, + double weighted_n_samples, + SIZE_t[:] samples, + SIZE_t start, SIZE_t end): + "Test purposes. Methods cannot be directly called from python." + criterion.init(y, + &sample_weight[0], weighted_n_samples, + &samples[0], start, end) + + +def _test_criterion_check(Criterion criterion): + if criterion.weighted_n_node_samples == 0: + raise ValueError( + "weighted_n_node_samples is null, weighted_n_left=%r, weighted_n_right=%r" % ( + criterion.weighted_n_left, criterion.weighted_n_right)) + + +def assert_criterion_equal(Criterion c1, Criterion c2): + if c1.weighted_n_node_samples != c2.weighted_n_node_samples: + raise ValueError( + "weighted_n_node_samples: %r != %r" % ( + c1.weighted_n_node_samples, c2.weighted_n_node_samples)) + if c1.weighted_n_samples != c2.weighted_n_samples: + raise ValueError( + "weighted_n_samples: %r != %r" % ( + c1.weighted_n_samples, c2.weighted_n_samples)) + if c1.weighted_n_left != c2.weighted_n_left: + raise ValueError( + "weighted_n_left: %r != %r" % ( + c1.weighted_n_left, c2.weighted_n_left)) + if c1.weighted_n_right != c2.weighted_n_right: + raise ValueError( + "weighted_n_right: %r != %r" % ( + c1.weighted_n_right, c2.weighted_n_right)) + + +def _test_criterion_node_impurity(Criterion criterion): + "Test purposes. Methods cannot be directly called from python." + return criterion.node_impurity() + + +def _test_criterion_proxy_impurity_improvement(Criterion criterion): + "Test purposes. Methods cannot be directly called from python." + return criterion.proxy_impurity_improvement() + + +def _test_criterion_impurity_improvement(Criterion criterion, double impurity_parent, + double impurity_left, double impurity_right): + "Test purposes. Methods cannot be directly called from python." + return criterion.impurity_improvement(impurity_parent, impurity_left, impurity_right) + + +def _test_criterion_node_impurity_children(Criterion criterion): + "Test purposes. Methods cannot be directly called from python." + cdef DOUBLE_t left, right + criterion.children_impurity(&left, &right) + return left, right + + +def _test_criterion_node_value(Criterion criterion): + "Test purposes. Methods cannot be directly called from python." + cdef DOUBLE_t value + criterion.node_value(&value) + return value + + +def _test_criterion_update(Criterion criterion, SIZE_t new_pos): + "Test purposes. Methods cannot be directly called from python." + return criterion.update(new_pos) + + +def _test_criterion_printf(Criterion crit): + "Test purposes. Methods cannot be directly called from python." + printf("start=%zu pos=%zu end=%zu\n", crit.start, crit.pos, crit.end) + cdef DOUBLE_t left, right, value + cdef int i; + crit.children_impurity(&left, &right) + crit.node_value(&value) + printf("value: %f total=%f left=%f right=%f\n", value, + crit.node_impurity(), left, right) + cdef int n = crit.y.shape[0] + for i in range(0, n): + printf("-- %d: y=%f\n", i, crit.y[i, 0]) diff --git a/mlinsights/mltree/_tree_digitize.pyx b/mlinsights/mltree/_tree_digitize.pyx index 08610d89..3f43d42a 100644 --- a/mlinsights/mltree/_tree_digitize.pyx +++ b/mlinsights/mltree/_tree_digitize.pyx @@ -24,16 +24,18 @@ cdef SIZE_t _tree_add_node(Tree tree, double threshold, double impurity, SIZE_t n_node_samples, - double weighted_n_node_samples): + double weighted_n_node_samples, + char missing_go_to_left): if parent == -1: parent = TREE_UNDEFINED return tree._add_node(parent, is_left, is_leaf, feature, threshold, impurity, - n_node_samples, weighted_n_node_samples) - + n_node_samples, weighted_n_node_samples, + missing_go_to_left) def tree_add_node(tree, parent, is_left, is_leaf, feature, threshold, - impurity, n_node_samples, weighted_n_node_samples): + impurity, n_node_samples, weighted_n_node_samples, + missing_go_to_left): """ Adds a node to tree. @@ -45,6 +47,8 @@ def tree_add_node(tree, parent, is_left, is_leaf, feature, threshold, :param impurity: impurity :param n_node_samples: number of samples this node represents :param weighted_n_node_samples: node weight + :param missing_go_to_left: whether features have missing values """ return _tree_add_node(tree, parent, is_left, is_leaf, feature, threshold, - impurity, n_node_samples, weighted_n_node_samples) + impurity, n_node_samples, weighted_n_node_samples, + missing_go_to_left) diff --git a/requirements-win.txt b/requirements-win.txt deleted file mode 100644 index 21a75ae0..00000000 --- a/requirements-win.txt +++ /dev/null @@ -1,17 +0,0 @@ -astroid -ijson -importlib_metadata -ipython -isort -joblib -jupyter -matplotlib -nbformat -numpy -pandas -psutil -pylint>=2.14.0 -pymyinstall -pyshp -scikit-learn>=1.2.1 -threadpoolctl diff --git a/requirements.txt b/requirements.txt index 049bef1b..e90d90da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ pydata-sphinx-theme pyquickhelper>=1.10 pyquicksetup pylint>=2.14.0 -scikit-learn>=1.2.1 +scikit-learn>=1.3.0 scipy seaborn skl2onnx diff --git a/setup.py b/setup.py index 53147c44..d37cc204 100644 --- a/setup.py +++ b/setup.py @@ -122,7 +122,7 @@ def get_extensions(): package_dir=package_dir, package_data=package_data, setup_requires=["pyquicksetup", 'cython', 'scipy', 'scikit-learn'], - install_requires=['cython', 'scikit-learn>1.2', 'pandas', 'scipy', - 'matplotlib', 'pandas_streaming', 'numpy>=1.16'], + install_requires=['cython', 'scikit-learn>=1.3', 'pandas', 'scipy', + 'matplotlib', 'pandas_streaming', 'numpy>=1.21'], ext_modules=ext_modules, # cythonize(ext_modules), ) From d553c724c43cf3fb9672aaa561d211b7b7fe21d8 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Sun, 2 Jul 2023 15:49:31 +0200 Subject: [PATCH 11/17] fix issue --- azure-pipelines.yml | 2 -- mlinsights/mlmodel/sklearn_transform_inv_fct.py | 2 +- mlinsights/mltree/tree_digitize.py | 16 ++++++---------- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5dfac303..8b761af2 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -108,8 +108,6 @@ jobs: architecture: 'x64' - script: gcc --version displayName: 'gcc version' - - script: brew update - displayName: 'brew update' - script: export displayName: 'export' - script: gcc --version diff --git a/mlinsights/mlmodel/sklearn_transform_inv_fct.py b/mlinsights/mlmodel/sklearn_transform_inv_fct.py index 60fb6f4e..f18b9621 100644 --- a/mlinsights/mlmodel/sklearn_transform_inv_fct.py +++ b/mlinsights/mlmodel/sklearn_transform_inv_fct.py @@ -183,7 +183,7 @@ def transform(self, X, y): if y is None: return X, None self._check_is_fitted() - if len(y.shape) == 1 or y.dtype in (numpy.str, numpy.int32, numpy.int64): + if len(y.shape) == 1 or y.dtype in (numpy.str_, numpy.int32, numpy.int64): # permutes classes yp = y.copy().ravel() num = numpy.issubdtype(y.dtype, numpy.floating) diff --git a/mlinsights/mltree/tree_digitize.py b/mlinsights/mltree/tree_digitize.py index dcc1f03e..58825324 100644 --- a/mlinsights/mltree/tree_digitize.py +++ b/mlinsights/mltree/tree_digitize.py @@ -84,8 +84,7 @@ def add_root(index): is_left = False is_leaf = False threshold = bins[index] - n = tree_add_node( - tree, parent, is_left, is_leaf, 0, threshold, 0, 1, 1.) + n = tree_add_node(tree, parent, is_left, is_leaf, 0, threshold, 0, 1, 1., 0) values.append(UNUSED) n_nodes.append(n) return n @@ -96,7 +95,7 @@ def add_nodes(parent, i, j, is_left): # it means j is the parent split if i == j: # leaf - n = tree_add_node(tree, parent, is_left, True, 0, 0, 0, 1, 1.) + n = tree_add_node(tree, parent, is_left, True, 0, 0, 0, 1, 1., 0) n_nodes.append(n) values.append(i) return n @@ -104,8 +103,7 @@ def add_nodes(parent, i, j, is_left): # split values.append(UNUSED) th = bins[i] - n = tree_add_node(tree, parent, is_left, - False, 0, th, 0, 1, 1.) + n = tree_add_node(tree, parent, is_left, False, 0, th, 0, 1, 1., 0) n_nodes.append(n) add_nodes(n, i, i, True) add_nodes(n, i, j, False) @@ -115,8 +113,7 @@ def add_nodes(parent, i, j, is_left): values.append(UNUSED) index = (i + j) // 2 th = bins[index] - n = tree_add_node(tree, parent, is_left, - False, 0, th, 0, 1, 1.) + n = tree_add_node(tree, parent, is_left, False, 0, th, 0, 1, 1., 0) n_nodes.append(n) add_nodes(n, i, index, True) add_nodes(n, index, j, False) @@ -126,7 +123,7 @@ def add_nodes(parent, i, j, is_left): if i + 1 == j: # leaf values.append(j) - n = tree_add_node(tree, parent, is_left, True, 0, 0, 0, 1, 1.) + n = tree_add_node(tree, parent, is_left, True, 0, 0, 0, 1, 1., 0) n_nodes.append(n) return n if i + 1 < j: @@ -134,8 +131,7 @@ def add_nodes(parent, i, j, is_left): values.append(UNUSED) index = (i + j) // 2 th = bins[index] - n = tree_add_node(tree, parent, is_left, - False, 0, th, 0, 1, 1.) + n = tree_add_node(tree, parent, is_left, False, 0, th, 0, 1, 1., 0) n_nodes.append(n) add_nodes(n, i, index, True) add_nodes(n, index, j, False) From b295cdd35814f2043dec4c74c0743373806ee535 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Sun, 2 Jul 2023 16:06:44 +0200 Subject: [PATCH 12/17] disable one warning --- _unittests/ut_module/test_SKIP_code_style.py | 4 ++-- requirements.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/_unittests/ut_module/test_SKIP_code_style.py b/_unittests/ut_module/test_SKIP_code_style.py index 6486b177..2fe2f136 100644 --- a/_unittests/ut_module/test_SKIP_code_style.py +++ b/_unittests/ut_module/test_SKIP_code_style.py @@ -17,7 +17,7 @@ def test_style_src(self): pylint_ignore=('C0103', 'C1801', 'R1705', 'W0108', 'W0613', 'W0201', 'W0221', 'E0632', 'R1702', 'W0212', 'W0223', 'W0107', "R1720", 'R1732', 'C0209', 'C3001', - 'R1728'), + 'R1728', 'R1735'), skip=["categories_to_integers.py:174: W0640", "E0401: Unable to import 'mlinsights.mlmodel.piecewise_tree_regression_criterion", "setup.py:", @@ -31,7 +31,7 @@ def test_style_test(self): check_pep8(test, fLOG=fLOG, neg_pattern="temp_.*", pylint_ignore=('C0103', 'C1801', 'R1705', 'W0108', 'W0613', 'C0111', 'W0107', 'C0111', 'R1702', 'C0415', "R1720", - 'R1732', 'C0209', 'C3001', 'R1728'), + 'R1732', 'C0209', 'C3001', 'R1728', 'R1735'), skip=["Instance of 'tuple' has no", "[E402] module level import", "E0611: No name '_test_criterion_", diff --git a/requirements.txt b/requirements.txt index e90d90da..9b725298 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ chardet coverage cpyquickhelper>=0.3 cython +ipython joblib jupyter_sphinx>=0.2 jyquickhelper From ccb580d079212c17fdc22138a263c718be2898f0 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Sun, 2 Jul 2023 16:34:47 +0200 Subject: [PATCH 13/17] lint --- _unittests/ut_helpers/test_debug.py | 4 ++-- _unittests/ut_mlmodel/test_quantile_regression.py | 2 +- azure-pipelines.yml | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/_unittests/ut_helpers/test_debug.py b/_unittests/ut_helpers/test_debug.py index 270f8764..c32debf7 100644 --- a/_unittests/ut_helpers/test_debug.py +++ b/_unittests/ut_helpers/test_debug.py @@ -38,7 +38,7 @@ def test_union_features_reg(self): self.assertNotIn(" object at 0x", text) self.assertIn(") -> (", text) else: - raise Exception("should not be the case") + raise AssertionError("should not be the case") def test_union_features_cl(self): data = numpy.random.randn(4, 5) @@ -60,7 +60,7 @@ def test_union_features_cl(self): self.assertNotIn(" object at 0x", text) self.assertIn(") -> (", text) else: - raise Exception("should not be the case") + raise AssertionError("should not be the case") if __name__ == "__main__": diff --git a/_unittests/ut_mlmodel/test_quantile_regression.py b/_unittests/ut_mlmodel/test_quantile_regression.py index fb80b5a5..b60b261e 100644 --- a/_unittests/ut_mlmodel/test_quantile_regression.py +++ b/_unittests/ut_mlmodel/test_quantile_regression.py @@ -216,7 +216,7 @@ def test_quantile_regression_diff_quantile(self): def test_quantile_regression_quantile_check(self): n = 100 - X = (numpy.arange(n) / n) + X = numpy.arange(n) / n Y = X + X * X / n X = X.reshape((n, 1)) for q in [0.1, 0.5, 0.9]: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8b761af2..11a6d8b1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -4,8 +4,8 @@ jobs: vmImage: 'ubuntu-latest' strategy: matrix: - Python310-Linux: - python.version: '3.10' + Python311-Linux: + python.version: '3.11' maxParallel: 3 steps: - task: UsePythonVersion@0 @@ -64,8 +64,8 @@ jobs: vmImage: 'windows-latest' strategy: matrix: - Python310-Windows: - python.version: '3.10' + Python311-Windows: + python.version: '3.11' maxParallel: 3 steps: - task: UsePythonVersion@0 From 83144bcc296abcfcff9f0d6d4ea71457687ac0a0 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Sun, 2 Jul 2023 16:43:25 +0200 Subject: [PATCH 14/17] setup --- .circleci/config.yml | 2 +- appveyor.yml | 2 +- azure-pipelines.yml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c97c4bfc..acd79c92 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -46,7 +46,7 @@ jobs: - run: name: Install standard libraries command: | - python -m pip install scipy matplotlib numpy cython pandas + python -m pip install scipy matplotlib numpy cython pandas pyquicksetup - run: name: install dependencies diff --git a/appveyor.yml b/appveyor.yml index 641fc25d..256ffa86 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,7 +12,7 @@ init: install: - "%PYTHON%\\python -m pip install --upgrade pip" # for many packages - - "%PYTHON%\\Scripts\\pip install llvmlite numba" + - "%PYTHON%\\Scripts\\pip install llvmlite numba pyquicksetup" # install precompiled versions not available on pypi - "%PYTHON%\\Scripts\\pip install torch torchvision torchaudio" # other dependencies diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 11a6d8b1..e24deb79 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -33,7 +33,7 @@ jobs: displayName: 'Install 7z, rar' - script: sudo apt-get install -y graphviz displayName: 'Install Graphviz' - - script: pip install --upgrade pip setuptools wheel + - script: pip install --upgrade pip setuptools wheel pyquicksetup displayName: 'Install tools' - script: pip install numpy displayName: 'Install numpy' @@ -72,7 +72,7 @@ jobs: inputs: versionSpec: '$(python.version)' architecture: 'x64' - - script: python -m pip install --upgrade pip setuptools wheel + - script: python -m pip install --upgrade pip setuptools wheel pyquicksetup displayName: 'Install tools' - script: pip install -r requirements.txt displayName: 'Install Requirements' @@ -132,7 +132,7 @@ jobs: # continueOnError: true # displayName: 'Install latex' - bash: conda install -y -c conda-forge numpy scipy - displayName: Install numpy scipy + displayName: Install numpy scipy pyquicksetup - bash: conda install -y -c conda-forge llvmlite numba displayName: Install llvmlite numba - bash: conda install -y -c conda-forge pyproj cartopy shapely From 518578bfc0131643f7f6903cf811f98f8389000b Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Sun, 2 Jul 2023 16:56:10 +0200 Subject: [PATCH 15/17] requirements.txt --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 9b725298..12b499c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ llvmlite matplotlib memory_profiler>=0.55 nbconvert>=6.0.2 +notebook numba numpy onnx @@ -30,4 +31,5 @@ sphinx>=3.0 sphinxcontrib.imagesvg sphinx_gallery tqdm +traitlets wheel From 039ec982830cc91998fafdf0de1a739293bae77c Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Sun, 2 Jul 2023 17:02:45 +0200 Subject: [PATCH 16/17] requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 12b499c7..f20aaf9c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,6 +30,7 @@ skl2onnx sphinx>=3.0 sphinxcontrib.imagesvg sphinx_gallery +stack-data tqdm traitlets wheel From ff6aa0ae183a53c09120b97d1511386b05d38a4d Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Sun, 2 Jul 2023 17:07:50 +0200 Subject: [PATCH 17/17] remove appveyor --- appveyor.yml | 34 ---------------------------------- requirements.txt | 2 -- 2 files changed, 36 deletions(-) delete mode 100644 appveyor.yml diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 256ffa86..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,34 +0,0 @@ -image: - - Visual Studio 2019 -environment: - matrix: - - PYTHON: "C:\\Python310-x64" - PYTHON_VERSION: "3.10.x" - PYTHON_ARCH: "64" - SKL: '>=1.0' -init: - - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%" - -install: - - "%PYTHON%\\python -m pip install --upgrade pip" - # for many packages - - "%PYTHON%\\Scripts\\pip install llvmlite numba pyquicksetup" - # install precompiled versions not available on pypi - - "%PYTHON%\\Scripts\\pip install torch torchvision torchaudio" - # other dependencies - - "%PYTHON%\\Scripts\\pip install -r requirements.txt --no-deps" - - "%PYTHON%\\Scripts\\pip install scikit-learn%SKL%" -build: off - -before_test: - - "%PYTHON%\\python -u setup.py build_ext --inplace --verbose" - -test_script: - - "%PYTHON%\\python -u setup.py unittests" - -after_test: - - "%PYTHON%\\python -u setup.py bdist_wheel" - -artifacts: - - path: dist - name: mlinsights diff --git a/requirements.txt b/requirements.txt index f20aaf9c..6f98ca03 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,7 +30,5 @@ skl2onnx sphinx>=3.0 sphinxcontrib.imagesvg sphinx_gallery -stack-data tqdm -traitlets wheel