From 4686108cc8f93202b82477514df1936c6f494682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 18 Mar 2021 00:57:44 +0100 Subject: [PATCH 01/11] Fixes #95, PiecewiseRegressor, makes sure target are vectors --- .../ut_mlmodel/test_piecewise_regressor.py | 19 +++++++++++++++++++ mlinsights/mlmodel/piecewise_estimator.py | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/_unittests/ut_mlmodel/test_piecewise_regressor.py b/_unittests/ut_mlmodel/test_piecewise_regressor.py index 41fbdc65..33a58ea2 100644 --- a/_unittests/ut_mlmodel/test_piecewise_regressor.py +++ b/_unittests/ut_mlmodel/test_piecewise_regressor.py @@ -8,6 +8,8 @@ import pandas from sklearn.linear_model import LinearRegression from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from sklearn.datasets import make_regression +from sklearn.tree import DecisionTreeRegressor from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv from mlinsights.mlmodel.piecewise_estimator import PiecewiseRegressor @@ -148,6 +150,23 @@ def test_piecewise_regressor_grid_search(self): self.assertGreater(res['score'], 0) self.assertLesser(res['score'], 1) + def test_piecewise_regressor_issue(self): + X, y = make_regression(10000, n_features=1, n_informative=1, + n_targets=1) + y = y.reshape((-1, 1)) + model = PiecewiseRegressor( + binner=DecisionTreeRegressor(min_samples_leaf=300)) + model.fit(X, y) + vvc = model.predict(X) + self.assertEqual(vvc.shape, (X.shape[0], )) + + def test_piecewise_regressor_raise(self): + X, y = make_regression(10000, n_features=2, n_informative=2, + n_targets=2) + model = PiecewiseRegressor( + binner=DecisionTreeRegressor(min_samples_leaf=300)) + self.assertRaise(lambda: model.fit(X, y), RuntimeError) + if __name__ == "__main__": unittest.main() diff --git a/mlinsights/mlmodel/piecewise_estimator.py b/mlinsights/mlmodel/piecewise_estimator.py index a843137d..608c3e83 100644 --- a/mlinsights/mlmodel/piecewise_estimator.py +++ b/mlinsights/mlmodel/piecewise_estimator.py @@ -223,6 +223,12 @@ def fit(self, X, y, sample_weight=None): * `dim_`: dimension of the output * `mean_`: average targets """ + if len(y.shape) == 2: + if y.shape[-1] == 1: + y = y.ravel() + else: + raise RuntimeError( + "This regressor only works with single dimension targets.") if isinstance(X, pandas.DataFrame): X = X.values if isinstance(X, list): From 851b8eb21abb7c1e150b4a3f2d2135f7519caf3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 18 Mar 2021 01:33:19 +0100 Subject: [PATCH 02/11] lint --- _unittests/ut_mlmodel/test_piecewise_regressor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_unittests/ut_mlmodel/test_piecewise_regressor.py b/_unittests/ut_mlmodel/test_piecewise_regressor.py index 33a58ea2..c39d4974 100644 --- a/_unittests/ut_mlmodel/test_piecewise_regressor.py +++ b/_unittests/ut_mlmodel/test_piecewise_regressor.py @@ -7,9 +7,9 @@ from numpy.random import random import pandas from sklearn.linear_model import LinearRegression -from pyquickhelper.pycode import ExtTestCase, ignore_warnings from sklearn.datasets import make_regression from sklearn.tree import DecisionTreeRegressor +from pyquickhelper.pycode import ExtTestCase, ignore_warnings from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv from mlinsights.mlmodel.piecewise_estimator import PiecewiseRegressor @@ -151,7 +151,7 @@ def test_piecewise_regressor_grid_search(self): self.assertLesser(res['score'], 1) def test_piecewise_regressor_issue(self): - X, y = make_regression(10000, n_features=1, n_informative=1, + X, y = make_regression(10000, n_features=1, n_informative=1, # pylint: disable=W0632 n_targets=1) y = y.reshape((-1, 1)) model = PiecewiseRegressor( @@ -161,7 +161,7 @@ def test_piecewise_regressor_issue(self): self.assertEqual(vvc.shape, (X.shape[0], )) def test_piecewise_regressor_raise(self): - X, y = make_regression(10000, n_features=2, n_informative=2, + X, y = make_regression(10000, n_features=2, n_informative=2, # pylint: disable=W0632 n_targets=2) model = PiecewiseRegressor( binner=DecisionTreeRegressor(min_samples_leaf=300)) From 00465d28d089063abc0b80191bfd3c2e07f1c982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 15 Apr 2021 23:49:25 +0200 Subject: [PATCH 03/11] refactor setup.py --- .local.jenkins.lin.yml | 4 +- requirements.txt | 1 + setup.py | 293 ++++++++++++----------------------------- 3 files changed, 86 insertions(+), 212 deletions(-) diff --git a/.local.jenkins.lin.yml b/.local.jenkins.lin.yml index af24dc0a..659259ed 100644 --- a/.local.jenkins.lin.yml +++ b/.local.jenkins.lin.yml @@ -20,7 +20,7 @@ before_script: - $PYINT -u setup.py build_ext --inplace script: - - { CMD: "$PYINT -u setup.py unittests", NAME: "UT" } + - { CMD: "$PYINT -u setup.py unittests --covtoken=1ac0b95d-6722-4f29-804a-e4e0d5295497", NAME: "UT" } - { CMD: "$PYINT -u setup.py unittests_LONG", NAME: "UT_DEEP_LONG" } - { CMD: "$PYINT -u setup.py unittests_SKIP", NAME: "UT_SKIP" } @@ -29,5 +29,5 @@ after_script: - if [ ${NAME} == "UT" ] then cp dist/*.whl {{root_path}}/../local_pypi/local_pypi_server fi documentation: - - if [ ${NAME} == "UT" ] then $PYINT -u setup.py build_sphinx fi + - if [ ${NAME} == "UT" ] then $PYINT -u setup.py build_sphinx --layout=html fi - if [ ${NAME} == "UT" ] then cp -R -f _doc/sphinxdoc/build/html dist/html fi diff --git a/requirements.txt b/requirements.txt index d7c43b68..a90bd2c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ pandas_streaming pybind11 pycodestyle pyquickhelper>=1.10 +pyquicksetup pylint>=2.4.3 scikit-learn>=0.22.1 scipy diff --git a/setup.py b/setup.py index 4390b7f1..f480a69d 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,7 @@ import os import warnings from setuptools import setup, Extension, find_packages +from pyquicksetup import read_version, read_readme, default_cmdclass ######### # settings @@ -37,215 +38,87 @@ project_var_name + ".mlmodel": ["*.pxd", "*.pyx"], } -############ -# functions -############ - - -def ask_help(): - return "--help" in sys.argv or "--help-commands" in sys.argv - - -def is_local(): - file = os.path.abspath(__file__).replace("\\", "/").lower() - if "/temp/" in file and "pip-" in file: - return False - try: - from pyquickhelper.pycode.setup_helper import available_commands_list - except ImportError: - return False - return available_commands_list(sys.argv) - - -def verbose(): - print("---------------------------------") - print("package_dir =", package_dir) - print("packages =", packages) - print("package_data=", package_data) - print("current =", os.path.abspath(os.getcwd())) - print("---------------------------------") - -########## -# version -########## - - -if is_local() and not ask_help(): - def write_version(): - from pyquickhelper.pycode import write_version_for_setup - return write_version_for_setup(__file__) - - try: - write_version() - subversion = None - except Exception: - subversion = "" - - if subversion is None: - versiontxt = os.path.join(os.path.dirname(__file__), "version.txt") - if os.path.exists(versiontxt): - with open(versiontxt, "r") as f: - lines = f.readlines() - subversion = "." + lines[0].strip("\r\n ") - if subversion == ".0": - raise Exception( - "Git version is wrong: '{0}'.".format(subversion)) - else: - subversion = "" -else: - # when the module is installed, no commit number is displayed - subversion = "" - -if "upload" in sys.argv and not subversion and not ask_help(): - # avoid uploading with a wrong subversion number - raise Exception( - "Git version is empty, cannot upload, is_local()={0}".format(is_local())) - -############## -# common part -############## - -if os.path.exists(readme): - with open(readme, "r", encoding='utf-8-sig') as f: - long_description = f.read() -else: - long_description = "" -if os.path.exists(history): - with open(history, "r", encoding='utf-8-sig') as f: - long_description += f.read() - -if "--verbose" in sys.argv: - verbose() - -build_commmands = {"bdist_msi", "sdist", - "bdist_wheel", "publish", "publish_doc", "register", - "upload_docs", "bdist_wininst", "build_ext"} - -if is_local(): - import pyquickhelper - logging_function = pyquickhelper.get_fLOG() - logging_function(OutputPrint=True) - must_build, run_build_ext = pyquickhelper.get_insetup_functions() - - if must_build() and not ask_help(): - out = run_build_ext(__file__) - print(out) - - from pyquickhelper.pycode import process_standard_options_for_setup - r = process_standard_options_for_setup( - sys.argv, __file__, project_var_name, - unittest_modules=["pyquickhelper"], - additional_notebook_path=["pyquickhelper", "cpyquickhelper", - "jyquickhelper", "pandas_streaming"], - additional_local_path=["pyquickhelper", "cpyquickhelper", - "jyquickhelper", "pandas_streaming"], - requirements=["pyquickhelper", "jyquickhelper", "pandas_streaming"], - layout=["html"], - add_htmlhelp=sys.platform.startswith("win"), - coverage_options=dict(omit=["*exclude*.py"]), - fLOG=logging_function, github_owner='sdpython', - covtoken=("1ac0b95d-6722-4f29-804a-e4e0d5295497", "'_UT_39_std' in outfile")) - if not r and not (build_commmands & set(sys.argv)): - raise Exception("unable to interpret command line: " + str(sys.argv)) -else: - r = False - -if r and ask_help(): - from pyquickhelper.pycode import process_standard_options_for_setup_help - process_standard_options_for_setup_help(sys.argv) - -if not r: - if len(sys.argv) in (1, 2) and sys.argv[-1] in ("--help-commands",): - from pyquickhelper.pycode import process_standard_options_for_setup_help - process_standard_options_for_setup_help(sys.argv) - - try: - from pyquickhelper.pycode import clean_readme - long_description = clean_readme(long_description) - except ImportError: - long_description = "" - - from mlinsights import __version__ as sversion - - def get_extensions(): - root = os.path.abspath(os.path.dirname(__file__)) - if sys.platform.startswith("win"): - extra_compile_args = None - else: - extra_compile_args = ['-std=c++11'] - - ext_modules = [] - - # mlmodel - import sklearn - extensions = ["direct_blas_lapack"] - spl = sklearn.__version__.split('.') - vskl = (int(spl[0]), int(spl[1])) - if vskl >= (0, 24): - extensions.append(("_piecewise_tree_regression_common", - "_piecewise_tree_regression_common024")) +def get_extensions(): + root = os.path.abspath(os.path.dirname(__file__)) + if sys.platform.startswith("win"): + extra_compile_args = None + else: + extra_compile_args = ['-std=c++11'] + + ext_modules = [] + + # mlmodel + + import sklearn + extensions = ["direct_blas_lapack"] + spl = sklearn.__version__.split('.') + vskl = (int(spl[0]), int(spl[1])) + if vskl >= (0, 24): + extensions.append(("_piecewise_tree_regression_common", + "_piecewise_tree_regression_common024")) + else: + extensions.append(("_piecewise_tree_regression_common", + "_piecewise_tree_regression_common023")) + extensions.extend([ + "piecewise_tree_regression_criterion", + "piecewise_tree_regression_criterion_linear", + "piecewise_tree_regression_criterion_fast", + ]) + + pattern1 = "mlinsights.mlmodel.%s" + import numpy + for name in extensions: + if isinstance(name, tuple): + m = Extension(pattern1 % name[0], + ['mlinsights/mlmodel/%s.pyx' % name[1]], + include_dirs=[numpy.get_include()], + extra_compile_args=["-O3"], + language='c') else: - extensions.append(("_piecewise_tree_regression_common", - "_piecewise_tree_regression_common023")) - extensions.extend([ - "piecewise_tree_regression_criterion", - "piecewise_tree_regression_criterion_linear", - "piecewise_tree_regression_criterion_fast", - ]) - - pattern1 = "mlinsights.mlmodel.%s" - import numpy - for name in extensions: - if isinstance(name, tuple): - m = Extension(pattern1 % name[0], - ['mlinsights/mlmodel/%s.pyx' % name[1]], - include_dirs=[numpy.get_include()], - extra_compile_args=["-O3"], - language='c') - else: - m = Extension(pattern1 % name, - ['mlinsights/mlmodel/%s.pyx' % name], - include_dirs=[numpy.get_include()], - extra_compile_args=["-O3"], - language='c') - ext_modules.append(m) - - # cythonize - from Cython.Build import cythonize - opts = dict(boundscheck=False, cdivision=True, - wraparound=False, language_level=3, - cdivision_warnings=False, embedsignature=True, - initializedcheck=False) - ext_modules = cythonize(ext_modules, compiler_directives=opts) - return ext_modules - - try: - ext_modules = get_extensions() - except ImportError as e: - warnings.warn( - "Unable to build C++ extension with missing dependencies %r." % e) - ext_modules = None - - # setup - - setup( - name=project_var_name, - version=sversion, - author='Xavier Dupré', - author_email='xavier.dupre@gmail.com', - license="MIT", - url="http://www.xavierdupre.fr/app/%s/helpsphinx/index.html" % project_var_name, - download_url="https://github.com/sdpython/%s/" % project_var_name, - description=DESCRIPTION, - long_description=long_description, - keywords=KEYWORDS, - classifiers=CLASSIFIERS, - packages=packages, - package_dir=package_dir, - package_data=package_data, - setup_requires=["pyquickhelper", 'cython', 'scipy', 'scikit-learn'], - install_requires=['cython', 'scikit-learn>=0.22.1', 'pandas', 'scipy', - 'matplotlib', 'pandas_streaming', 'numpy>=1.16'], - ext_modules=ext_modules, # cythonize(ext_modules), - ) + m = Extension(pattern1 % name, + ['mlinsights/mlmodel/%s.pyx' % name], + include_dirs=[numpy.get_include()], + extra_compile_args=["-O3"], + language='c') + ext_modules.append(m) + + # cythonize + from Cython.Build import cythonize + opts = dict(boundscheck=False, cdivision=True, + wraparound=False, language_level=3, + cdivision_warnings=False, embedsignature=True, + initializedcheck=False) + ext_modules = cythonize(ext_modules, compiler_directives=opts) + return ext_modules + +try: + ext_modules = get_extensions() +except ImportError as e: + warnings.warn( + "Unable to build C++ extension with missing dependencies %r." % e) + ext_modules = None + +# setup + +setup( + name=project_var_name, + version=read_version(__file__, project_var_name), + author='Xavier Dupré', + author_email='xavier.dupre@gmail.com', + license="MIT", + url="http://www.xavierdupre.fr/app/%s/helpsphinx/index.html" % project_var_name, + download_url="https://github.com/sdpython/%s/" % project_var_name, + description=DESCRIPTION, + long_description=read_readme(__file__), + cmdclass=default_cmdclass(), + keywords=KEYWORDS, + classifiers=CLASSIFIERS, + packages=packages, + package_dir=package_dir, + package_data=package_data, + setup_requires=["pyquicksetup", 'cython', 'scipy', 'scikit-learn'], + install_requires=['cython', 'scikit-learn>=0.22.1', 'pandas', 'scipy', + 'matplotlib', 'pandas_streaming', 'numpy>=1.16'], + ext_modules=ext_modules, # cythonize(ext_modules), +) From c1469de3594646dc3b7f6f172db9d3ac4fe34cef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 15 Apr 2021 23:49:42 +0200 Subject: [PATCH 04/11] Revert "refactor setup.py" This reverts commit 00465d28d089063abc0b80191bfd3c2e07f1c982. --- .local.jenkins.lin.yml | 4 +- requirements.txt | 1 - setup.py | 293 +++++++++++++++++++++++++++++------------ 3 files changed, 212 insertions(+), 86 deletions(-) diff --git a/.local.jenkins.lin.yml b/.local.jenkins.lin.yml index 659259ed..af24dc0a 100644 --- a/.local.jenkins.lin.yml +++ b/.local.jenkins.lin.yml @@ -20,7 +20,7 @@ before_script: - $PYINT -u setup.py build_ext --inplace script: - - { CMD: "$PYINT -u setup.py unittests --covtoken=1ac0b95d-6722-4f29-804a-e4e0d5295497", NAME: "UT" } + - { CMD: "$PYINT -u setup.py unittests", NAME: "UT" } - { CMD: "$PYINT -u setup.py unittests_LONG", NAME: "UT_DEEP_LONG" } - { CMD: "$PYINT -u setup.py unittests_SKIP", NAME: "UT_SKIP" } @@ -29,5 +29,5 @@ after_script: - if [ ${NAME} == "UT" ] then cp dist/*.whl {{root_path}}/../local_pypi/local_pypi_server fi documentation: - - if [ ${NAME} == "UT" ] then $PYINT -u setup.py build_sphinx --layout=html fi + - if [ ${NAME} == "UT" ] then $PYINT -u setup.py build_sphinx fi - if [ ${NAME} == "UT" ] then cp -R -f _doc/sphinxdoc/build/html dist/html fi diff --git a/requirements.txt b/requirements.txt index a90bd2c5..d7c43b68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,6 @@ pandas_streaming pybind11 pycodestyle pyquickhelper>=1.10 -pyquicksetup pylint>=2.4.3 scikit-learn>=0.22.1 scipy diff --git a/setup.py b/setup.py index f480a69d..4390b7f1 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,6 @@ import os import warnings from setuptools import setup, Extension, find_packages -from pyquicksetup import read_version, read_readme, default_cmdclass ######### # settings @@ -38,87 +37,215 @@ project_var_name + ".mlmodel": ["*.pxd", "*.pyx"], } +############ +# functions +############ + + +def ask_help(): + return "--help" in sys.argv or "--help-commands" in sys.argv + + +def is_local(): + file = os.path.abspath(__file__).replace("\\", "/").lower() + if "/temp/" in file and "pip-" in file: + return False + try: + from pyquickhelper.pycode.setup_helper import available_commands_list + except ImportError: + return False + return available_commands_list(sys.argv) + + +def verbose(): + print("---------------------------------") + print("package_dir =", package_dir) + print("packages =", packages) + print("package_data=", package_data) + print("current =", os.path.abspath(os.getcwd())) + print("---------------------------------") + +########## +# version +########## + + +if is_local() and not ask_help(): + def write_version(): + from pyquickhelper.pycode import write_version_for_setup + return write_version_for_setup(__file__) + + try: + write_version() + subversion = None + except Exception: + subversion = "" + + if subversion is None: + versiontxt = os.path.join(os.path.dirname(__file__), "version.txt") + if os.path.exists(versiontxt): + with open(versiontxt, "r") as f: + lines = f.readlines() + subversion = "." + lines[0].strip("\r\n ") + if subversion == ".0": + raise Exception( + "Git version is wrong: '{0}'.".format(subversion)) + else: + subversion = "" +else: + # when the module is installed, no commit number is displayed + subversion = "" + +if "upload" in sys.argv and not subversion and not ask_help(): + # avoid uploading with a wrong subversion number + raise Exception( + "Git version is empty, cannot upload, is_local()={0}".format(is_local())) + +############## +# common part +############## + +if os.path.exists(readme): + with open(readme, "r", encoding='utf-8-sig') as f: + long_description = f.read() +else: + long_description = "" +if os.path.exists(history): + with open(history, "r", encoding='utf-8-sig') as f: + long_description += f.read() + +if "--verbose" in sys.argv: + verbose() + +build_commmands = {"bdist_msi", "sdist", + "bdist_wheel", "publish", "publish_doc", "register", + "upload_docs", "bdist_wininst", "build_ext"} + +if is_local(): + import pyquickhelper + logging_function = pyquickhelper.get_fLOG() + logging_function(OutputPrint=True) + must_build, run_build_ext = pyquickhelper.get_insetup_functions() + + if must_build() and not ask_help(): + out = run_build_ext(__file__) + print(out) + + from pyquickhelper.pycode import process_standard_options_for_setup + r = process_standard_options_for_setup( + sys.argv, __file__, project_var_name, + unittest_modules=["pyquickhelper"], + additional_notebook_path=["pyquickhelper", "cpyquickhelper", + "jyquickhelper", "pandas_streaming"], + additional_local_path=["pyquickhelper", "cpyquickhelper", + "jyquickhelper", "pandas_streaming"], + requirements=["pyquickhelper", "jyquickhelper", "pandas_streaming"], + layout=["html"], + add_htmlhelp=sys.platform.startswith("win"), + coverage_options=dict(omit=["*exclude*.py"]), + fLOG=logging_function, github_owner='sdpython', + covtoken=("1ac0b95d-6722-4f29-804a-e4e0d5295497", "'_UT_39_std' in outfile")) + if not r and not (build_commmands & set(sys.argv)): + raise Exception("unable to interpret command line: " + str(sys.argv)) +else: + r = False + +if r and ask_help(): + from pyquickhelper.pycode import process_standard_options_for_setup_help + process_standard_options_for_setup_help(sys.argv) + +if not r: + if len(sys.argv) in (1, 2) and sys.argv[-1] in ("--help-commands",): + from pyquickhelper.pycode import process_standard_options_for_setup_help + process_standard_options_for_setup_help(sys.argv) + + try: + from pyquickhelper.pycode import clean_readme + long_description = clean_readme(long_description) + except ImportError: + long_description = "" + + from mlinsights import __version__ as sversion + + def get_extensions(): + root = os.path.abspath(os.path.dirname(__file__)) + if sys.platform.startswith("win"): + extra_compile_args = None + else: + extra_compile_args = ['-std=c++11'] + + ext_modules = [] + + # mlmodel -def get_extensions(): - root = os.path.abspath(os.path.dirname(__file__)) - if sys.platform.startswith("win"): - extra_compile_args = None - else: - extra_compile_args = ['-std=c++11'] - - ext_modules = [] - - # mlmodel - - import sklearn - extensions = ["direct_blas_lapack"] - spl = sklearn.__version__.split('.') - vskl = (int(spl[0]), int(spl[1])) - if vskl >= (0, 24): - extensions.append(("_piecewise_tree_regression_common", - "_piecewise_tree_regression_common024")) - else: - extensions.append(("_piecewise_tree_regression_common", - "_piecewise_tree_regression_common023")) - extensions.extend([ - "piecewise_tree_regression_criterion", - "piecewise_tree_regression_criterion_linear", - "piecewise_tree_regression_criterion_fast", - ]) - - pattern1 = "mlinsights.mlmodel.%s" - import numpy - for name in extensions: - if isinstance(name, tuple): - m = Extension(pattern1 % name[0], - ['mlinsights/mlmodel/%s.pyx' % name[1]], - include_dirs=[numpy.get_include()], - extra_compile_args=["-O3"], - language='c') + import sklearn + extensions = ["direct_blas_lapack"] + spl = sklearn.__version__.split('.') + vskl = (int(spl[0]), int(spl[1])) + if vskl >= (0, 24): + extensions.append(("_piecewise_tree_regression_common", + "_piecewise_tree_regression_common024")) else: - m = Extension(pattern1 % name, - ['mlinsights/mlmodel/%s.pyx' % name], - include_dirs=[numpy.get_include()], - extra_compile_args=["-O3"], - language='c') - ext_modules.append(m) - - # cythonize - from Cython.Build import cythonize - opts = dict(boundscheck=False, cdivision=True, - wraparound=False, language_level=3, - cdivision_warnings=False, embedsignature=True, - initializedcheck=False) - ext_modules = cythonize(ext_modules, compiler_directives=opts) - return ext_modules - -try: - ext_modules = get_extensions() -except ImportError as e: - warnings.warn( - "Unable to build C++ extension with missing dependencies %r." % e) - ext_modules = None - -# setup - -setup( - name=project_var_name, - version=read_version(__file__, project_var_name), - author='Xavier Dupré', - author_email='xavier.dupre@gmail.com', - license="MIT", - url="http://www.xavierdupre.fr/app/%s/helpsphinx/index.html" % project_var_name, - download_url="https://github.com/sdpython/%s/" % project_var_name, - description=DESCRIPTION, - long_description=read_readme(__file__), - cmdclass=default_cmdclass(), - keywords=KEYWORDS, - classifiers=CLASSIFIERS, - packages=packages, - package_dir=package_dir, - package_data=package_data, - setup_requires=["pyquicksetup", 'cython', 'scipy', 'scikit-learn'], - install_requires=['cython', 'scikit-learn>=0.22.1', 'pandas', 'scipy', - 'matplotlib', 'pandas_streaming', 'numpy>=1.16'], - ext_modules=ext_modules, # cythonize(ext_modules), -) + extensions.append(("_piecewise_tree_regression_common", + "_piecewise_tree_regression_common023")) + extensions.extend([ + "piecewise_tree_regression_criterion", + "piecewise_tree_regression_criterion_linear", + "piecewise_tree_regression_criterion_fast", + ]) + + pattern1 = "mlinsights.mlmodel.%s" + import numpy + for name in extensions: + if isinstance(name, tuple): + m = Extension(pattern1 % name[0], + ['mlinsights/mlmodel/%s.pyx' % name[1]], + include_dirs=[numpy.get_include()], + extra_compile_args=["-O3"], + language='c') + else: + m = Extension(pattern1 % name, + ['mlinsights/mlmodel/%s.pyx' % name], + include_dirs=[numpy.get_include()], + extra_compile_args=["-O3"], + language='c') + ext_modules.append(m) + + # cythonize + from Cython.Build import cythonize + opts = dict(boundscheck=False, cdivision=True, + wraparound=False, language_level=3, + cdivision_warnings=False, embedsignature=True, + initializedcheck=False) + ext_modules = cythonize(ext_modules, compiler_directives=opts) + return ext_modules + + try: + ext_modules = get_extensions() + except ImportError as e: + warnings.warn( + "Unable to build C++ extension with missing dependencies %r." % e) + ext_modules = None + + # setup + + setup( + name=project_var_name, + version=sversion, + author='Xavier Dupré', + author_email='xavier.dupre@gmail.com', + license="MIT", + url="http://www.xavierdupre.fr/app/%s/helpsphinx/index.html" % project_var_name, + download_url="https://github.com/sdpython/%s/" % project_var_name, + description=DESCRIPTION, + long_description=long_description, + keywords=KEYWORDS, + classifiers=CLASSIFIERS, + packages=packages, + package_dir=package_dir, + package_data=package_data, + setup_requires=["pyquickhelper", 'cython', 'scipy', 'scikit-learn'], + install_requires=['cython', 'scikit-learn>=0.22.1', 'pandas', 'scipy', + 'matplotlib', 'pandas_streaming', 'numpy>=1.16'], + ext_modules=ext_modules, # cythonize(ext_modules), + ) From 93bf2c29d6480788916a0f892f068ffc9faf252a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 13 May 2021 23:05:09 +0200 Subject: [PATCH 05/11] documentation --- _doc/sphinxdoc/source/conf.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py index 8431a1d0..0fb4c5bb 100644 --- a/_doc/sphinxdoc/source/conf.py +++ b/_doc/sphinxdoc/source/conf.py @@ -20,8 +20,12 @@ title="mlinsights", book=True) blog_root = "http://www.xavierdupre.fr/app/mlinsights/helpsphinx/" -html_context = {'css_files': get_default_stylesheet() + - ['_static/my-styles.css']} + +html_context = { + 'css_files': get_default_stylesheet([ + '_static/my-styles.css', '_static/gallery.css']), +} + html_logo = "phdoc_static/project_ico.png" html_sidebars = {} language = "en" From 47a08f677aeeed6469be7e861d1c2a8b121daea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 14 May 2021 00:41:22 +0200 Subject: [PATCH 06/11] code coverage --- mlinsights/mlmodel/_kmeans_022.py | 2 +- mlinsights/mlmodel/_kmeans_constraint_.py | 3 ++- mlinsights/mlmodel/decision_tree_logreg.py | 2 +- mlinsights/mlmodel/extended_features.py | 7 ++++--- mlinsights/mlmodel/piecewise_estimator.py | 2 +- mlinsights/mlmodel/sklearn_transform_inv_fct.py | 17 ++++++++++------- mlinsights/mlmodel/target_predictors.py | 2 +- mlinsights/mltree/tree_structure.py | 3 ++- 8 files changed, 22 insertions(+), 16 deletions(-) diff --git a/mlinsights/mlmodel/_kmeans_022.py b/mlinsights/mlmodel/_kmeans_022.py index 51a1ae28..acc4a0fd 100644 --- a/mlinsights/mlmodel/_kmeans_022.py +++ b/mlinsights/mlmodel/_kmeans_022.py @@ -9,7 +9,7 @@ from sklearn.utils.sparsefuncs_fast import assign_rows_csr # pylint: disable=W0611,E0611 try: from sklearn.cluster._kmeans import _check_sample_weight -except ImportError: +except ImportError: # pragma: no cover from sklearn.cluster._kmeans import ( _check_normalize_sample_weight as _check_sample_weight) from sklearn.metrics.pairwise import pairwise_distances_argmin_min diff --git a/mlinsights/mlmodel/_kmeans_constraint_.py b/mlinsights/mlmodel/_kmeans_constraint_.py index 2f170a6d..764d8e83 100644 --- a/mlinsights/mlmodel/_kmeans_constraint_.py +++ b/mlinsights/mlmodel/_kmeans_constraint_.py @@ -234,7 +234,8 @@ def _compute_strategy_coefficient(distances, strategy, labels): ar = numpy.arange(distances.shape[0]) dist = distances[ar, labels] return distances - dist[:, numpy.newaxis] - raise ValueError("Unknwon strategy '{0}'.".format(strategy)) + raise ValueError( # pragma: no cover + "Unknwon strategy '{0}'.".format(strategy)) def _randomize_index(index, weights): diff --git a/mlinsights/mlmodel/decision_tree_logreg.py b/mlinsights/mlmodel/decision_tree_logreg.py index eb6c2be8..8f6918c6 100644 --- a/mlinsights/mlmodel/decision_tree_logreg.py +++ b/mlinsights/mlmodel/decision_tree_logreg.py @@ -422,7 +422,7 @@ def _fit_parallel(self, X, y, sample_weight): def _fit_perpendicular(self, X, y, sample_weight): "Implements the perpendicular strategy." - raise NotImplementedError() + raise NotImplementedError() # pragma: no cover def predict(self, X): """ diff --git a/mlinsights/mlmodel/extended_features.py b/mlinsights/mlmodel/extended_features.py index de0edc6c..fcaf8fcf 100644 --- a/mlinsights/mlmodel/extended_features.py +++ b/mlinsights/mlmodel/extended_features.py @@ -71,7 +71,7 @@ def _get_feature_names_poly(self, input_features=None): input_features = ["x%d" % i for i in range(0, self.n_input_features_)] elif len(input_features) != self.n_input_features_: - raise ValueError( + raise ValueError( # pragma: no cover "input_features should contain {} strings.".format( self.n_input_features_)) @@ -125,7 +125,7 @@ def fit(self, X, y=None): return self._fit_poly(X, y) elif self.kind == 'poly-slow': return self._fit_poly(X, y) - raise ValueError( + raise ValueError( # pragma: no cover "Unknown extended features '{}'.".format(self.kind)) def _fit_poly(self, X, y=None): @@ -148,7 +148,8 @@ def transform(self, X): """ n_features = X.shape[1] if n_features != self.n_input_features_: - raise ValueError("X shape does not match training shape") + raise ValueError( # pragma: no cover + "X shape does not match training shape") if self.kind == 'poly': return self._transform_poly(X) if self.kind == 'poly-slow': diff --git a/mlinsights/mlmodel/piecewise_estimator.py b/mlinsights/mlmodel/piecewise_estimator.py index 608c3e83..d56d31bc 100644 --- a/mlinsights/mlmodel/piecewise_estimator.py +++ b/mlinsights/mlmodel/piecewise_estimator.py @@ -13,7 +13,7 @@ from sklearn.utils.fixes import _joblib_parallel_args try: from tqdm import tqdm -except ImportError: +except ImportError: # pragma: no cover pass diff --git a/mlinsights/mlmodel/sklearn_transform_inv_fct.py b/mlinsights/mlmodel/sklearn_transform_inv_fct.py index 3af89143..596eff93 100644 --- a/mlinsights/mlmodel/sklearn_transform_inv_fct.py +++ b/mlinsights/mlmodel/sklearn_transform_inv_fct.py @@ -46,12 +46,13 @@ def __init__(self, fct, fct_inv=None): BaseReciprocalTransformer.__init__(self) if isinstance(fct, str): if fct_inv is not None: - raise ValueError( + raise ValueError( # pragma: no cover "If fct is a function name, fct_inv must not be specified.") opts = self.__class__.available_fcts() if fct not in opts: - raise ValueError("Unknown fct '{}', it should in {}.".format( - fct, list(sorted(opts)))) + raise ValueError( # pragma: no cover + "Unknown fct '{}', it should in {}.".format( + fct, list(sorted(opts)))) else: if fct_inv is None: raise ValueError( @@ -117,7 +118,8 @@ def fit(self, X=None, y=None, sample_weight=None): Defines a random permutation over the targets. """ if y is None: - raise RuntimeError("targets cannot be empty.") + raise RuntimeError( # pragma: no cover + "targets cannot be empty.") num = numpy.issubdtype(y.dtype, numpy.floating) perm = {} for u in y.ravel(): @@ -141,7 +143,7 @@ def fit(self, X=None, y=None, sample_weight=None): def _check_is_fitted(self): if not hasattr(self, 'permutation_'): - raise NotFittedError( + raise NotFittedError( # pragma: no cover "This instance {} is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.".format( type(self))) @@ -169,8 +171,9 @@ def _find_closest(self, cl): return float(res) if self.knn_perm_.dtype in (numpy.int32, numpy.int64): return int(res) - raise NotImplementedError("The function does not work for type {}.".format( - self.knn_perm_.dtype)) + raise NotImplementedError( # pragma: no cover + "The function does not work for type {}.".format( + self.knn_perm_.dtype)) def transform(self, X, y): """ diff --git a/mlinsights/mlmodel/target_predictors.py b/mlinsights/mlmodel/target_predictors.py index 8646ce91..ef65b2d9 100644 --- a/mlinsights/mlmodel/target_predictors.py +++ b/mlinsights/mlmodel/target_predictors.py @@ -109,7 +109,7 @@ def predict(self, X): Predicted values. """ if not hasattr(self, 'regressor_'): - raise NotFittedError( + raise NotFittedError( # pragma: no cover "This instance {} is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.".format( type(self))) diff --git a/mlinsights/mltree/tree_structure.py b/mlinsights/mltree/tree_structure.py index 4fe86edb..1f96517e 100644 --- a/mlinsights/mltree/tree_structure.py +++ b/mlinsights/mltree/tree_structure.py @@ -14,7 +14,8 @@ def _get_tree(obj): return obj if hasattr(obj, "tree_"): return obj.tree_ - raise AttributeError("obj is no tree: {}".format(type(obj))) + raise AttributeError( # pragma: no cover + "obj is no tree: {}".format(type(obj))) def tree_leave_index(model): From 47441d1030bb209bb9608aefea3bf0c11acda003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 26 May 2021 12:34:37 +0200 Subject: [PATCH 07/11] documentation --- _doc/{ => sphinxdoc/source}/pipeline.png | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename _doc/{ => sphinxdoc/source}/pipeline.png (100%) diff --git a/_doc/pipeline.png b/_doc/sphinxdoc/source/pipeline.png similarity index 100% rename from _doc/pipeline.png rename to _doc/sphinxdoc/source/pipeline.png From 4dccea905bd2a546be862c0bbe7c300d61096170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 27 May 2021 00:10:08 +0200 Subject: [PATCH 08/11] Update sklearn_testing.py --- mlinsights/mlmodel/sklearn_testing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlinsights/mlmodel/sklearn_testing.py b/mlinsights/mlmodel/sklearn_testing.py index f8f9c879..ca6c9287 100644 --- a/mlinsights/mlmodel/sklearn_testing.py +++ b/mlinsights/mlmodel/sklearn_testing.py @@ -259,6 +259,8 @@ def test_sklearn_grid_search_cv(fct_model, X, y=None, sample_weight=None, **grid clf = GridSearchCV(pipe, parameters) if y_train is None and w_train is None: clf.fit(X_train) + elif w_train is None: + clf.fit(X_train, y_train) # pylint: disable=E1121 else: clf.fit(X_train, y_train, w_train) # pylint: disable=E1121 score = clf.score(X_test, y_test) From 7a5f6edd76a4ca521aa2e859249b39b235a14924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 27 May 2021 00:24:40 +0200 Subject: [PATCH 09/11] ut --- .travis.yml | 2 +- _unittests/ut_mlmodel/test_categories_to_integers.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 222d6028..12f535b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ matrix: - python: 3.9 name: "Py39-024+" env: - - sklver=">=0.24.0" + - sklver=">=0.24.2" - jlver=">=1.0" - python: 3.8 name: "Py38-023" diff --git a/_unittests/ut_mlmodel/test_categories_to_integers.py b/_unittests/ut_mlmodel/test_categories_to_integers.py index ff46011b..ab97c421 100644 --- a/_unittests/ut_mlmodel/test_categories_to_integers.py +++ b/_unittests/ut_mlmodel/test_categories_to_integers.py @@ -119,10 +119,11 @@ def test_categories_to_integers_grid_search(self): LogisticRegression()) self.assertRaise(lambda: test_sklearn_grid_search_cv( lambda: pipe, df), ValueError) - self.assertRaise( - lambda: test_sklearn_grid_search_cv( - lambda: pipe, X, y, categoriestointegers__single=[True, False]), - ValueError, "Unable to find category value") + if compare_module_version(sklver, "0.24") >= 0: + self.assertRaise( + lambda: test_sklearn_grid_search_cv( + lambda: pipe, X, y, categoriestointegers__single=[True, False]), + ValueError, "Unable to find category value") pipe = make_pipeline(CategoriesToIntegers(), Imputer(strategy='most_frequent'), LogisticRegression(n_jobs=1)) From b90475a709ed65c44525df88fa4f21899d492c14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 27 May 2021 00:38:15 +0200 Subject: [PATCH 10/11] Update test_classification_kmeans.py --- _unittests/ut_mlmodel/test_classification_kmeans.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/_unittests/ut_mlmodel/test_classification_kmeans.py b/_unittests/ut_mlmodel/test_classification_kmeans.py index 6e227b20..7ae4e8f5 100644 --- a/_unittests/ut_mlmodel/test_classification_kmeans.py +++ b/_unittests/ut_mlmodel/test_classification_kmeans.py @@ -57,7 +57,12 @@ def test_classification_kmeans_intercept_weights(self): def test_classification_kmeans_pickle(self): iris = datasets.load_iris() X, y = iris.data, iris.target - test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y) + try: + test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y) + except AttributeError as e: + if compare_module_version(sklver, "0.24") < 0: + return + raise e def test_classification_kmeans_clone(self): self.maxDiff = None From 007c871f3f391c6c3b53760244607d78e864a2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 27 May 2021 02:28:18 +0200 Subject: [PATCH 11/11] documentation --- _doc/sphinxdoc/source/conf.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py index 0fb4c5bb..6f2b117c 100644 --- a/_doc/sphinxdoc/source/conf.py +++ b/_doc/sphinxdoc/source/conf.py @@ -5,7 +5,7 @@ import sys import os import alabaster -from pyquickhelper.helpgen.default_conf import set_sphinx_variables, get_default_stylesheet +from pyquickhelper.helpgen.default_conf import set_sphinx_variables sys.path.insert(0, os.path.abspath(os.path.join(os.path.split(__file__)[0]))) @@ -21,10 +21,7 @@ blog_root = "http://www.xavierdupre.fr/app/mlinsights/helpsphinx/" -html_context = { - 'css_files': get_default_stylesheet([ - '_static/my-styles.css', '_static/gallery.css']), -} +html_css_files = ['my-styles.css'] html_logo = "phdoc_static/project_ico.png" html_sidebars = {}