From ea9c544697aacd425de30cd6711435b91a78676b Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 21 Apr 2023 12:49:39 +0200 Subject: [PATCH 01/87] ENH: first draft of asarray for array_api --- scipy/_lib/_array_api.py | 92 ++++++++++++++++++++++++++++++++++++++++ scipy/_lib/meson.build | 1 + 2 files changed, 93 insertions(+) create mode 100644 scipy/_lib/_array_api.py diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py new file mode 100644 index 000000000000..310b7d76d953 --- /dev/null +++ b/scipy/_lib/_array_api.py @@ -0,0 +1,92 @@ +"""Utility functions to use Python Array API compatible libraries. + +For the context about the Array API see: +https://data-apis.org/array-api/latest/purpose_and_scope.html + +The SciPy use case of the Array API is described on the following page: +https://data-apis.org/array-api/latest/use_cases.html#use-case-scipy +""" +import numpy as np +# probably want to vendor it (submodule) +import array_api_compat + +__all__ = ['namespace_from_arrays', 'asarray', 'asarray_namespace'] + + +def namespace_from_arrays(*arrays, single_namespace=True): + # if we cannot get the namespace, np is used + namespaces = set() + for array in arrays: + try: + namespaces.add(array_api_compat.array_namespace(array)) + except TypeError: + namespaces.add(array_api_compat.numpy) + + if single_namespace and len(namespaces) != 1: + raise ValueError( + f"Expected a single common namespace for array inputs, \ + but got: {[n.__name__ for n in namespaces]}" + ) + + (xp,) = namespaces + + return xp + + +def asarray(array, dtype=None, order=None, copy=None, *, xp=None): + """Drop-in replacement for `np.asarray`. + + Memory layout parameter `order` is not exposed in the Array API standard. + `order` is only enforced if the input array implementation + is NumPy based, otherwise `order` is just silently ignored. + + The purpose of this helper is to make it possible to share code for data + container validation without memory copies for both downstream use cases + """ + if xp is None: + xp = namespace_from_arrays(array) + if xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"}: + # Use NumPy API to support order + if copy is True: + array = np.array(array, order=order, dtype=dtype) + else: + array = np.asarray(array, order=order, dtype=dtype) + + # At this point array is a NumPy ndarray. We convert it to an array + # container that is consistent with the input's namespace. + return xp.asarray(array) + else: + return xp.asarray(array, dtype=dtype, copy=copy) + +def asarray_namespace(*arrays): + """Validate and convert arrays to a common namespace. + + Parameters + ---------- + *arrays : sequence of array_like + Arrays to validate and convert. + + Returns + ------- + *arrays : sequence of array_like + Validated and converted arrays to the common namespace. + namespace : module + Common namespace. + + Examples + -------- + >>> import numpy as np + >>> x, y, xp = asarray_namespace([0, 1, 2], np.arange(3)) + >>> xp.__name__ + 'array_api_compat.numpy' + >>> x, y + (array([0, 1, 2]]), array([0, 1, 2])) + + """ + arrays = list(arrays) # probably not good + xp = namespace_from_arrays(*arrays) + + for i, array in enumerate(arrays): + arrays[i] = asarray(array, xp=xp) + + return *arrays, xp diff --git a/scipy/_lib/meson.build b/scipy/_lib/meson.build index 2d3f4fc7a4af..95551d1272b0 100644 --- a/scipy/_lib/meson.build +++ b/scipy/_lib/meson.build @@ -98,6 +98,7 @@ py3.extension_module('messagestream', python_sources = [ '__init__.py', + '_array_api.py', '_bunch.py', '_ccallback.py', '_disjoint_set.py', From 28a633924fe8f76eba43f8e43fec2657c856cbb7 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 25 Apr 2023 16:34:20 +0200 Subject: [PATCH 02/87] ENH: add global config env variable --- scipy/_lib/_array_api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 310b7d76d953..24b5c5f28642 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -6,9 +6,15 @@ The SciPy use case of the Array API is described on the following page: https://data-apis.org/array-api/latest/use_cases.html#use-case-scipy """ +import os + import numpy as np # probably want to vendor it (submodule) import array_api_compat +import array_api_compat.numpy + +# SCIPY_ARRAY_API, array_api_dispatch is used by sklearn +USE_ARRAY_API = os.environ.get("array_api_dispatch", False) __all__ = ['namespace_from_arrays', 'asarray', 'asarray_namespace'] From 65aca1eaa944c61e8129ddfa687b38768d5c7bc5 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 25 Apr 2023 16:35:25 +0200 Subject: [PATCH 03/87] TST: add test infra for USE_ARRAY_API --- pytest.ini | 1 + scipy/conftest.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/pytest.ini b/pytest.ini index 04021a5293d4..d390783c6d94 100644 --- a/pytest.ini +++ b/pytest.ini @@ -14,3 +14,4 @@ filterwarnings = ignore:.*The distutils.* is deprecated.*:DeprecationWarning ignore:\s*.*numpy.distutils.*:DeprecationWarning ignore:.*The --rsyncdir command line argument.*:DeprecationWarning + ignore:.*The numpy.array_api submodule is still experimental.*:UserWarning diff --git a/scipy/conftest.py b/scipy/conftest.py index f6afc20a3d20..483fe0608f43 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -4,10 +4,13 @@ import warnings import numpy as np +import numpy.array_api import numpy.testing as npt + from scipy._lib._fpumode import get_fpu_mode from scipy._lib._testutils import FPUModeChangeWarning from scipy._lib import _pep440 +from scipy._lib._array_api import USE_ARRAY_API def pytest_configure(config): @@ -93,3 +96,7 @@ def check_fpu_mode(request): warnings.warn("FPU mode changed from {:#x} to {:#x} during " "the test".format(old_mode, new_mode), category=FPUModeChangeWarning, stacklevel=0) + +array_api_compatible = pytest.mark.parametrize( + "xp", [np, *((numpy.array_api,) if USE_ARRAY_API else ())] +) From 912e55a79b9be1fcaac347a1d1ecfab79168c219 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 25 Apr 2023 16:35:52 +0200 Subject: [PATCH 04/87] TST: add some basic test cases --- scipy/_lib/tests/meson.build | 1 + scipy/_lib/tests/test_array_api.py | 40 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 scipy/_lib/tests/test_array_api.py diff --git a/scipy/_lib/tests/meson.build b/scipy/_lib/tests/meson.build index 13da47d3d412..197bb3f28ed4 100644 --- a/scipy/_lib/tests/meson.build +++ b/scipy/_lib/tests/meson.build @@ -12,6 +12,7 @@ python_sources = [ 'test_public_api.py', 'test_tmpdirs.py', 'test_warnings.py', + 'test_array_api.py', ] py3.install_sources( diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py new file mode 100644 index 000000000000..786ba46a4831 --- /dev/null +++ b/scipy/_lib/tests/test_array_api.py @@ -0,0 +1,40 @@ +import numpy as np +from numpy.testing import assert_equal +import pytest + +from scipy.conftest import array_api_compatible +from scipy._lib._array_api import ( + USE_ARRAY_API, namespace_from_arrays, asarray, asarray_namespace +) + + +if not USE_ARRAY_API: + pytest.skip( + "Array API test; set environment variable array_api_dispatch=1 to run it", + allow_module_level=True + ) + + +def test_namespace_from_arrays(): + x, y = [0, 1, 2], np.arange(3) + xp = namespace_from_arrays(x, y) + assert xp.__name__ == 'array_api_compat.numpy' + + +@array_api_compatible +def test_asarray(xp): + x, y = asarray([0, 1, 2], xp=xp), asarray(np.arange(3), xp=xp) + ref = np.array([0, 1, 2]) + assert_equal(x, ref) + assert_equal(y, ref) + + +@array_api_compatible +def test_asarray_namespace(xp): + x, y = [0, 1, 2], np.arange(3) + x, y, xp_ = asarray_namespace(x, y) + assert xp_.__name__ == 'array_api_compat.numpy' + ref = np.array([0, 1, 2]) + assert_equal(x, ref) + assert_equal(y, ref) + assert type(x) == type(y) From de56fe746904b82306ac97585f02e500cf53548a Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 25 Apr 2023 17:06:39 +0200 Subject: [PATCH 05/87] FIX: namespace simplification for NumPy like --- scipy/_lib/_array_api.py | 5 +++++ scipy/_lib/tests/test_array_api.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 24b5c5f28642..b314688ed5fe 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -9,6 +9,7 @@ import os import numpy as np +import numpy.array_api # probably want to vendor it (submodule) import array_api_compat import array_api_compat.numpy @@ -28,6 +29,10 @@ def namespace_from_arrays(*arrays, single_namespace=True): except TypeError: namespaces.add(array_api_compat.numpy) + if numpy.array_api in namespaces: + namespaces.remove(numpy.array_api) + namespaces.add(array_api_compat.numpy) + if single_namespace and len(namespaces) != 1: raise ValueError( f"Expected a single common namespace for array inputs, \ diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 786ba46a4831..7e20fda46d28 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -31,7 +31,7 @@ def test_asarray(xp): @array_api_compatible def test_asarray_namespace(xp): - x, y = [0, 1, 2], np.arange(3) + x, y = [0, 1, 2], xp.arange(3) x, y, xp_ = asarray_namespace(x, y) assert xp_.__name__ == 'array_api_compat.numpy' ref = np.array([0, 1, 2]) From ca5ff591c2cc2eadf97a376162eab2328951f836 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 26 Apr 2023 18:41:58 +0200 Subject: [PATCH 06/87] MAINT: refactor namespace_from_arrays to array_namespace to ease transition if needed --- scipy/_lib/_array_api.py | 9 +++++---- scipy/_lib/tests/test_array_api.py | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index b314688ed5fe..e93004f4c3ac 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -17,11 +17,12 @@ # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn USE_ARRAY_API = os.environ.get("array_api_dispatch", False) -__all__ = ['namespace_from_arrays', 'asarray', 'asarray_namespace'] +__all__ = ['array_namespace', 'asarray', 'asarray_namespace'] -def namespace_from_arrays(*arrays, single_namespace=True): +def array_namespace(*arrays, single_namespace=True): # if we cannot get the namespace, np is used + # here until moved upstream namespaces = set() for array in arrays: try: @@ -55,7 +56,7 @@ def asarray(array, dtype=None, order=None, copy=None, *, xp=None): container validation without memory copies for both downstream use cases """ if xp is None: - xp = namespace_from_arrays(array) + xp = array_namespace(array) if xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"}: # Use NumPy API to support order if copy is True: @@ -95,7 +96,7 @@ def asarray_namespace(*arrays): """ arrays = list(arrays) # probably not good - xp = namespace_from_arrays(*arrays) + xp = array_namespace(*arrays) for i, array in enumerate(arrays): arrays[i] = asarray(array, xp=xp) diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 7e20fda46d28..00f26c0c0e2b 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -4,7 +4,7 @@ from scipy.conftest import array_api_compatible from scipy._lib._array_api import ( - USE_ARRAY_API, namespace_from_arrays, asarray, asarray_namespace + USE_ARRAY_API, array_namespace, asarray, asarray_namespace ) @@ -15,9 +15,9 @@ ) -def test_namespace_from_arrays(): +def test_array_namespace(): x, y = [0, 1, 2], np.arange(3) - xp = namespace_from_arrays(x, y) + xp = array_namespace(x, y) assert xp.__name__ == 'array_api_compat.numpy' From cec8bd2b96da5fb2616f7b6066dcd05009c4f707 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 26 Apr 2023 18:53:24 +0200 Subject: [PATCH 07/87] FIX: consider numpy.array_api as something else than numpy --- scipy/_lib/_array_api.py | 7 ++----- scipy/_lib/tests/test_array_api.py | 9 ++++----- scipy/conftest.py | 4 ++-- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index e93004f4c3ac..f4619a2e9b5c 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -15,7 +15,8 @@ import array_api_compat.numpy # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn -USE_ARRAY_API = os.environ.get("array_api_dispatch", False) +array_api_dispatch = os.environ.get("array_api_dispatch", False) +SCIPY_ARRAY_API = os.environ.get("SCIPY_ARRAY_API", array_api_dispatch) __all__ = ['array_namespace', 'asarray', 'asarray_namespace'] @@ -30,10 +31,6 @@ def array_namespace(*arrays, single_namespace=True): except TypeError: namespaces.add(array_api_compat.numpy) - if numpy.array_api in namespaces: - namespaces.remove(numpy.array_api) - namespaces.add(array_api_compat.numpy) - if single_namespace and len(namespaces) != 1: raise ValueError( f"Expected a single common namespace for array inputs, \ diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 00f26c0c0e2b..6cdcf0101b88 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -4,11 +4,11 @@ from scipy.conftest import array_api_compatible from scipy._lib._array_api import ( - USE_ARRAY_API, array_namespace, asarray, asarray_namespace + SCIPY_ARRAY_API, array_namespace, asarray, asarray_namespace ) -if not USE_ARRAY_API: +if not SCIPY_ARRAY_API: pytest.skip( "Array API test; set environment variable array_api_dispatch=1 to run it", allow_module_level=True @@ -29,9 +29,8 @@ def test_asarray(xp): assert_equal(y, ref) -@array_api_compatible -def test_asarray_namespace(xp): - x, y = [0, 1, 2], xp.arange(3) +def test_asarray_namespace(): + x, y = [0, 1, 2], np.arange(3) x, y, xp_ = asarray_namespace(x, y) assert xp_.__name__ == 'array_api_compat.numpy' ref = np.array([0, 1, 2]) diff --git a/scipy/conftest.py b/scipy/conftest.py index 483fe0608f43..06ce0e44114d 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -10,7 +10,7 @@ from scipy._lib._fpumode import get_fpu_mode from scipy._lib._testutils import FPUModeChangeWarning from scipy._lib import _pep440 -from scipy._lib._array_api import USE_ARRAY_API +from scipy._lib._array_api import SCIPY_ARRAY_API def pytest_configure(config): @@ -98,5 +98,5 @@ def check_fpu_mode(request): category=FPUModeChangeWarning, stacklevel=0) array_api_compatible = pytest.mark.parametrize( - "xp", [np, *((numpy.array_api,) if USE_ARRAY_API else ())] + "xp", [np, *((numpy.array_api,) if SCIPY_ARRAY_API else ())] ) From 94d30442ab03df8be1dfb6c4dd4e665261787d8b Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 26 Apr 2023 18:58:52 +0200 Subject: [PATCH 08/87] ENH: add to_numpy helper --- scipy/_lib/_array_api.py | 17 +++++++++++++++++ scipy/_lib/tests/test_array_api.py | 10 +++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index f4619a2e9b5c..b5e517529619 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -99,3 +99,20 @@ def asarray_namespace(*arrays): arrays[i] = asarray(array, xp=xp) return *arrays, xp + + +def to_numpy(array, xp): + """Convert `array` into a NumPy ndarray on the CPU. + + This is specially useful to pass arrays to Cython. + """ + xp_name = xp.__name__ + + if xp_name in {"array_api_compat.torch", "torch"}: + return array.cpu().numpy() + elif xp_name == "cupy.array_api": + return array._array.get() + elif xp_name in {"array_api_compat.cupy", "cupy"}: # pragma: nocover + return array.get() + + return np.asarray(array) diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 6cdcf0101b88..16ba820e6950 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -4,7 +4,8 @@ from scipy.conftest import array_api_compatible from scipy._lib._array_api import ( - SCIPY_ARRAY_API, array_namespace, asarray, asarray_namespace + SCIPY_ARRAY_API, array_namespace, asarray, asarray_namespace, + to_numpy ) @@ -37,3 +38,10 @@ def test_asarray_namespace(): assert_equal(x, ref) assert_equal(y, ref) assert type(x) == type(y) + + +@array_api_compatible +def test_to_numpy(xp): + x = xp.asarray([0, 1, 2]) + x = to_numpy(x, xp=xp) + assert isinstance(x, np.ndarray) From 2ef5892f5f6f6d429ec03dd106787f28c374a0ba Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 26 Apr 2023 19:39:14 +0200 Subject: [PATCH 09/87] ENH: use SCIPY_ARRAY_API in array_namespace and fallback to np --- scipy/_lib/_array_api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index b5e517529619..17c9ba33a52e 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -22,6 +22,9 @@ def array_namespace(*arrays, single_namespace=True): + if not SCIPY_ARRAY_API: + return np + # if we cannot get the namespace, np is used # here until moved upstream namespaces = set() @@ -67,6 +70,7 @@ def asarray(array, dtype=None, order=None, copy=None, *, xp=None): else: return xp.asarray(array, dtype=dtype, copy=copy) + def asarray_namespace(*arrays): """Validate and convert arrays to a common namespace. From ad770b8cc8228952e7117ba0be5dc715deaf5688 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 26 Apr 2023 19:52:04 +0200 Subject: [PATCH 10/87] ENH: add a compliancy layer --- scipy/_lib/_array_api.py | 10 ++++++++++ scipy/_lib/tests/test_array_api.py | 12 ++++++++++++ 2 files changed, 22 insertions(+) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 17c9ba33a52e..567b90cab0c7 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -21,7 +21,17 @@ __all__ = ['array_namespace', 'asarray', 'asarray_namespace'] +def compliance_scipy(*arrays): + for array in arrays: + if isinstance(array, np.ma.MaskedArray): + raise TypeError("'numpy.ma.MaskedArray' are not supported") + elif isinstance(array, np.matrix): + raise TypeError("'numpy.matrix' are not supported") + + def array_namespace(*arrays, single_namespace=True): + compliance_scipy(*arrays) + if not SCIPY_ARRAY_API: return np diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 16ba820e6950..47606cf8ae99 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -45,3 +45,15 @@ def test_to_numpy(xp): x = xp.asarray([0, 1, 2]) x = to_numpy(x, xp=xp) assert isinstance(x, np.ndarray) + + +@pytest.mark.filterwarnings("ignore: the matrix subclass") +def test_raises(): + msg = "'numpy.ma.MaskedArray' are not supported" + with pytest.raises(TypeError, match=msg): + array_namespace(np.ma.array(1), np.array(1)) + + msg = "'numpy.matrix' are not supported" + with pytest.raises(TypeError, match=msg): + array_namespace(np.array(1), np.matrix(1)) + From 039e9313b9b9148344e51b61c3f7021f5ae87668 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 26 Apr 2023 20:28:47 +0200 Subject: [PATCH 11/87] ENH: dynamic env variable. --- scipy/_lib/_array_api.py | 7 +++++-- scipy/_lib/tests/test_array_api.py | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 567b90cab0c7..fd1d77049ea8 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -14,11 +14,14 @@ import array_api_compat import array_api_compat.numpy +__all__ = ['array_namespace', 'asarray', 'asarray_namespace'] + + # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn array_api_dispatch = os.environ.get("array_api_dispatch", False) SCIPY_ARRAY_API = os.environ.get("SCIPY_ARRAY_API", array_api_dispatch) -__all__ = ['array_namespace', 'asarray', 'asarray_namespace'] +_GLOBAL_CONFIG = {"SCIPY_ARRAY_API": SCIPY_ARRAY_API} def compliance_scipy(*arrays): @@ -32,7 +35,7 @@ def compliance_scipy(*arrays): def array_namespace(*arrays, single_namespace=True): compliance_scipy(*arrays) - if not SCIPY_ARRAY_API: + if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: return np # if we cannot get the namespace, np is used diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 47606cf8ae99..07a016fe8e41 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -4,12 +4,12 @@ from scipy.conftest import array_api_compatible from scipy._lib._array_api import ( - SCIPY_ARRAY_API, array_namespace, asarray, asarray_namespace, + _GLOBAL_CONFIG, array_namespace, asarray, asarray_namespace, to_numpy ) -if not SCIPY_ARRAY_API: +if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: pytest.skip( "Array API test; set environment variable array_api_dispatch=1 to run it", allow_module_level=True @@ -21,6 +21,11 @@ def test_array_namespace(): xp = array_namespace(x, y) assert xp.__name__ == 'array_api_compat.numpy' + _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False + xp = array_namespace(x, y) + assert xp.__name__ == 'numpy' + _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True + @array_api_compatible def test_asarray(xp): @@ -39,6 +44,11 @@ def test_asarray_namespace(): assert_equal(y, ref) assert type(x) == type(y) + _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False + x, y, xp_ = asarray_namespace(x, y) + assert xp_.__name__ == 'numpy' + _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True + @array_api_compatible def test_to_numpy(xp): From bb8e89c6b0b9763c2ac454ea9b231a2a59b23a9f Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 27 Apr 2023 14:25:45 +0200 Subject: [PATCH 12/87] ENH: swap order compliance/flag and use directly xp.asarray --- scipy/_lib/_array_api.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index fd1d77049ea8..61f756f332f7 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -33,11 +33,13 @@ def compliance_scipy(*arrays): def array_namespace(*arrays, single_namespace=True): - compliance_scipy(*arrays) if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: + # here we could wrap the namespace if needed return np + compliance_scipy(*arrays) + # if we cannot get the namespace, np is used # here until moved upstream namespaces = set() @@ -55,6 +57,7 @@ def array_namespace(*arrays, single_namespace=True): (xp,) = namespaces + # here we could wrap the namespace if needed return xp @@ -113,7 +116,7 @@ def asarray_namespace(*arrays): xp = array_namespace(*arrays) for i, array in enumerate(arrays): - arrays[i] = asarray(array, xp=xp) + arrays[i] = xp.asarray(array) return *arrays, xp From c4a2c7f499f6e511153650a228d2fff8ccb9430c Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 27 Apr 2023 21:28:21 +0200 Subject: [PATCH 13/87] ENH: only allow Array API arrays --- scipy/_lib/_array_api.py | 29 ++++++----------------------- scipy/_lib/tests/test_array_api.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 61f756f332f7..03f3dc9b3997 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -9,7 +9,6 @@ import os import numpy as np -import numpy.array_api # probably want to vendor it (submodule) import array_api_compat import array_api_compat.numpy @@ -30,9 +29,11 @@ def compliance_scipy(*arrays): raise TypeError("'numpy.ma.MaskedArray' are not supported") elif isinstance(array, np.matrix): raise TypeError("'numpy.matrix' are not supported") + elif not array_api_compat.is_array_api_obj(array): + raise TypeError("Only support Array API compatible arrays") -def array_namespace(*arrays, single_namespace=True): +def array_namespace(*arrays): if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: # here we could wrap the namespace if needed @@ -40,25 +41,7 @@ def array_namespace(*arrays, single_namespace=True): compliance_scipy(*arrays) - # if we cannot get the namespace, np is used - # here until moved upstream - namespaces = set() - for array in arrays: - try: - namespaces.add(array_api_compat.array_namespace(array)) - except TypeError: - namespaces.add(array_api_compat.numpy) - - if single_namespace and len(namespaces) != 1: - raise ValueError( - f"Expected a single common namespace for array inputs, \ - but got: {[n.__name__ for n in namespaces]}" - ) - - (xp,) = namespaces - - # here we could wrap the namespace if needed - return xp + return array_api_compat.array_namespace(*arrays) def asarray(array, dtype=None, order=None, copy=None, *, xp=None): @@ -112,7 +95,7 @@ def asarray_namespace(*arrays): (array([0, 1, 2]]), array([0, 1, 2])) """ - arrays = list(arrays) # probably not good + arrays = list(arrays) xp = array_namespace(*arrays) for i, array in enumerate(arrays): @@ -124,7 +107,7 @@ def asarray_namespace(*arrays): def to_numpy(array, xp): """Convert `array` into a NumPy ndarray on the CPU. - This is specially useful to pass arrays to Cython. + ONLY FOR TESTING """ xp_name = xp.__name__ diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 07a016fe8e41..87f08c1fcff0 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -17,7 +17,7 @@ def test_array_namespace(): - x, y = [0, 1, 2], np.arange(3) + x, y = np.array([0, 1, 2]), np.array([0, 1, 2]) xp = array_namespace(x, y) assert xp.__name__ == 'array_api_compat.numpy' @@ -36,7 +36,7 @@ def test_asarray(xp): def test_asarray_namespace(): - x, y = [0, 1, 2], np.arange(3) + x, y = np.array([0, 1, 2]), np.array([0, 1, 2]) x, y, xp_ = asarray_namespace(x, y) assert xp_.__name__ == 'array_api_compat.numpy' ref = np.array([0, 1, 2]) @@ -67,3 +67,9 @@ def test_raises(): with pytest.raises(TypeError, match=msg): array_namespace(np.array(1), np.matrix(1)) + msg = "Only support Array API" + with pytest.raises(TypeError, match=msg): + array_namespace([0, 1, 2]) + + with pytest.raises(TypeError, match=msg): + array_namespace(1) From 31a030003e36e1bfd650b92c0442e01a38283677 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 27 Apr 2023 21:48:42 +0200 Subject: [PATCH 14/87] DOC: add comprehensive docstrings --- scipy/_lib/_array_api.py | 50 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 03f3dc9b3997..ed59eb4826ff 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -24,6 +24,13 @@ def compliance_scipy(*arrays): + """Raise exceptions on known-bad subclasses. + + The following subclasses are not supported and raise and error: + - `np.ma.MaskedArray` + - `numpy.matrix` + - Any array-like which is not Array API compatible + """ for array in arrays: if isinstance(array, np.ma.MaskedArray): raise TypeError("'numpy.ma.MaskedArray' are not supported") @@ -34,7 +41,31 @@ def compliance_scipy(*arrays): def array_namespace(*arrays): + """Get the array API compatible namespace for the arrays xs. + + Parameters + ---------- + *arrays : sequence of array_like + Arrays used to infer the common namespace. + + Returns + ------- + namespace : module + Common namespace. + + Notes + ----- + Thin wrapper around `array_api_compat.array_namespace`. + 1. Check for the global switch: SCIPY_ARRAY_API. This can also be accessed + dynamically through ``_GLOBAL_CONFIG['SCIPY_ARRAY_API']``. + 2. `compliance_scipy` raise exceptions on known-bad subclasses. See + it's definition for more details. + + When the global switch is False, it defaults to the `numpy` namespace. + In that case, there is no compliance check. This is a convenience to + ease the adoption. Otherwise, arrays must comply with the new rules. + """ if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: # here we could wrap the namespace if needed return np @@ -52,7 +83,7 @@ def asarray(array, dtype=None, order=None, copy=None, *, xp=None): is NumPy based, otherwise `order` is just silently ignored. The purpose of this helper is to make it possible to share code for data - container validation without memory copies for both downstream use cases + container validation without memory copies for both downstream use cases. """ if xp is None: xp = array_namespace(array) @@ -85,14 +116,27 @@ def asarray_namespace(*arrays): namespace : module Common namespace. + Notes + ----- + This function is meant to be called from each public function in a SciPy + submodule it does the following: + + 1. Check for the global switch: SCIPY_ARRAY_API. This can also be accessed + dynamically through ``_GLOBAL_CONFIG['SCIPY_ARRAY_API']``. + 2. `compliance_scipy` raise exceptions on known-bad subclasses. See + it's definition for more details. + 3. Determine the namespace, without doing any coercion of array(-like) + inputs. + 4. Call `xp.asarray` on all array. + Examples -------- >>> import numpy as np - >>> x, y, xp = asarray_namespace([0, 1, 2], np.arange(3)) + >>> x, y, xp = asarray_namespace(np.array([0, 1, 2]), np.array([0, 1, 2])) >>> xp.__name__ 'array_api_compat.numpy' >>> x, y - (array([0, 1, 2]]), array([0, 1, 2])) + (array([0, 1, 2]), array([0, 1, 2])) """ arrays = list(arrays) From 3732e1cc3fac105fb6abb33248eed35f1fcb4127 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 4 May 2023 18:01:42 +0200 Subject: [PATCH 15/87] ENH: add support for Array API in hierarchy --- scipy/cluster/hierarchy.py | 161 ++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 83 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index a4f718d52caa..38a60ca347f8 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -134,6 +134,7 @@ import numpy as np from . import _hierarchy, _optimal_leaf_ordering import scipy.spatial.distance as distance +from scipy._lib._array_api import array_namespace, asarray_namespace, asarray from scipy._lib._disjoint_set import DisjointSet @@ -168,7 +169,7 @@ def _copy_array_if_base_present(a): elif np.issubsctype(a, np.float32): return np.array(a, dtype=np.double) else: - return a + return np.asarray(a) def _copy_arrays_if_base_present(T): @@ -181,28 +182,6 @@ def _copy_arrays_if_base_present(T): return [_copy_array_if_base_present(a) for a in T] -def _randdm(pnts): - """ - Generate a random distance matrix stored in condensed form. - - Parameters - ---------- - pnts : int - The number of points in the distance matrix. Has to be at least 2. - - Returns - ------- - D : ndarray - A ``pnts * (pnts - 1) / 2`` sized vector is returned. - """ - if pnts >= 2: - D = np.random.rand(pnts * (pnts - 1) / 2) - else: - raise ValueError("The number of points in the distance matrix " - "must be at least 2.") - return D - - def single(y): """ Perform single/min/nearest linkage on the condensed distance matrix ``y``. @@ -1042,7 +1021,8 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): if method not in _LINKAGE_METHODS: raise ValueError(f"Invalid method: {method}") - y = _convert_to_double(np.asarray(y, order='c')) + xp = array_namespace(y) + y = _convert_to_double(xp.asarray(y, order='c')) if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') @@ -1052,7 +1032,7 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): raise ValueError("Method '{}' requires the distance metric " "to be Euclidean".format(method)) if y.shape[0] == y.shape[1] and np.allclose(np.diag(y), 0): - if np.all(y >= 0) and np.allclose(y, y.T): + if xp.all(y >= 0) and xp.allclose(y, y.T): _warning('The symmetric non-negative hollow observation ' 'matrix looks suspiciously like an uncondensed ' 'distance matrix') @@ -1060,21 +1040,24 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): else: raise ValueError("`y` must be 1 or 2 dimensional.") - if not np.all(np.isfinite(y)): + if not xp.all(xp.isfinite(y)): raise ValueError("The condensed distance matrix must contain only " "finite values.") n = int(distance.num_obs_y(y)) method_code = _LINKAGE_METHODS[method] + y = np.asarray(y) if method == 'single': result = _hierarchy.mst_single_linkage(y, n) elif method in ['complete', 'average', 'weighted', 'ward']: result = _hierarchy.nn_chain(y, n, method_code) else: result = _hierarchy.fast_linkage(y, n, method_code) + result = xp.asarray(result) if optimal_ordering: + y = xp.asarray(y) return optimal_leaf_ordering(result, y) else: return result @@ -1359,6 +1342,7 @@ def cut_tree(Z, n_clusters=None, height=None): [4, 7]]) # random """ + xp = array_namespace(Z) nobs = num_obs_linkage(Z) nodes = _order_cluster_tree(Z) @@ -1366,21 +1350,21 @@ def cut_tree(Z, n_clusters=None, height=None): raise ValueError("At least one of either height or n_clusters " "must be None") elif height is None and n_clusters is None: # return the full cut tree - cols_idx = np.arange(nobs) + cols_idx = xp.arange(nobs) elif height is not None: - heights = np.array([x.dist for x in nodes]) - cols_idx = np.searchsorted(heights, height) + heights = xp.asarray([x.dist for x in nodes]) + cols_idx = xp.searchsorted(heights, height) else: - cols_idx = nobs - np.searchsorted(np.arange(nobs), n_clusters) + cols_idx = nobs - xp.searchsorted(xp.arange(nobs), n_clusters) try: n_cols = len(cols_idx) except TypeError: # scalar n_cols = 1 - cols_idx = np.array([cols_idx]) + cols_idx = xp.asarray([cols_idx]) - groups = np.zeros((n_cols, nobs), dtype=int) - last_group = np.arange(nobs) + groups = xp.zeros((n_cols, nobs), dtype=int) + last_group = xp.arange(nobs) if 0 in cols_idx: groups[0] = last_group @@ -1456,7 +1440,7 @@ def to_tree(Z, rd=False): 9 """ - Z = np.asarray(Z, order='c') + Z = asarray(Z, order='c') is_valid_linkage(Z, throw=True, name='Z') # Number of original objects is equal to the number of rows plus 1. @@ -1535,10 +1519,11 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): array([3, 0, 2, 5, 7, 4, 8, 6, 9, 1], dtype=int32) """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z, y) + Z = asarray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') - y = _convert_to_double(np.asarray(y, order='c')) + y = _convert_to_double(xp.asarray(y, order='c', xp=xp)) if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') @@ -1683,7 +1668,8 @@ def cophenet(Z, Y=None): corners - thus, the distance between these clusters will be larger. """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z, Y) + Z = asarray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') Zs = Z.shape n = Zs[0] + 1 @@ -1694,10 +1680,11 @@ def cophenet(Z, Y=None): Z = _convert_to_double(Z) _hierarchy.cophenetic_distances(Z, zz, int(n)) + zz = xp.asarray(zz) if Y is None: return zz - Y = np.asarray(Y, order='c') + Y = asarray(Y, order='c', xp=xp) distance.is_valid_y(Y, throw=True, name='Y') z = zz.mean() @@ -1707,7 +1694,7 @@ def cophenet(Z, Y=None): numerator = (Yy * Zz) denomA = Yy**2 denomB = Zz**2 - c = numerator.sum() / np.sqrt(denomA.sum() * denomB.sum()) + c = numerator.sum() / xp.sqrt(denomA.sum() * denomB.sum()) return (c, zz) @@ -1766,7 +1753,8 @@ def inconsistent(Z, d=2): [ 6.44583366, 6.76770586, 3. , 1.12682288]]) """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = asarray(Z, order='c', xp=xp) Zs = Z.shape is_valid_linkage(Z, throw=True, name='Z') @@ -1782,6 +1770,7 @@ def inconsistent(Z, d=2): R = np.zeros((n - 1, 4), dtype=np.double) _hierarchy.inconsistent(Z, R, int(n), int(d)) + R = xp.asarray(R) return R @@ -1855,7 +1844,8 @@ def from_mlab_linkage(Z): (MATLAB format uses 1-indexing, whereas SciPy uses 0-indexing). """ - Z = np.asarray(Z, dtype=np.double, order='c') + xp = array_namespace(Z) + Z = asarray(Z, dtype=xp.dtype('float64'), order='c', xp=xp) Zs = Z.shape # If it's empty, return it. @@ -1876,7 +1866,8 @@ def from_mlab_linkage(Z): Zpart[:, 0:2] -= 1.0 CS = np.zeros((Zs[0],), dtype=np.double) _hierarchy.calculate_cluster_sizes(Zpart, CS, int(Zs[0]) + 1) - return np.hstack([Zpart, CS.reshape(Zs[0], 1)]) + res = np.hstack([Zpart, CS.reshape(Zs[0], 1)]) + return xp.asarray(res) def to_mlab_linkage(Z): @@ -1954,7 +1945,7 @@ def to_mlab_linkage(Z): the original linkage matrix has been dropped. """ - Z = np.asarray(Z, order='c', dtype=np.double) + Z = asarray(Z, order='c', dtype=np.double) Zs = Z.shape if len(Zs) == 0 or (len(Zs) == 1 and Zs[0] == 0): return Z.copy() @@ -2044,7 +2035,7 @@ def is_monotonic(Z): increasing order. """ - Z = np.asarray(Z, order='c') + Z = asarray(Z, order='c') is_valid_linkage(Z, throw=True, name='Z') # We expect the i'th value to be greater than its successor. @@ -2138,14 +2129,12 @@ def is_valid_im(R, warning=False, throw=False, name=None): False """ - R = np.asarray(R, order='c') + xp = array_namespace(R) + R = asarray(R, order='c', xp=xp) valid = True name_str = "%r " % name if name else '' try: - if type(R) != np.ndarray: - raise TypeError('Variable %spassed as inconsistency matrix is not ' - 'a numpy array.' % name_str) - if R.dtype != np.double: + if R.dtype != xp.dtype('float64'): raise TypeError('Inconsistency matrix %smust contain doubles ' '(double).' % name_str) if len(R.shape) != 2: @@ -2257,14 +2246,12 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): False """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = asarray(Z, order='c', xp=xp) valid = True name_str = "%r " % name if name else '' try: - if type(Z) != np.ndarray: - raise TypeError('Passed linkage argument %sis not a valid array.' % - name_str) - if Z.dtype != np.double: + if Z.dtype != xp.dtype('float64'): raise TypeError('Linkage matrix %smust contain doubles.' % name_str) if len(Z.shape) != 2: raise ValueError('Linkage matrix %smust have shape=2 (i.e. be ' @@ -2363,7 +2350,7 @@ def num_obs_linkage(Z): 12 """ - Z = np.asarray(Z, order='c') + Z = asarray(Z, order='c') is_valid_linkage(Z, throw=True, name='Z') return (Z.shape[0] + 1) @@ -2419,8 +2406,9 @@ def correspond(Z, Y): """ is_valid_linkage(Z, throw=True) distance.is_valid_y(Y, throw=True) - Z = np.asarray(Z, order='c') - Y = np.asarray(Y, order='c') + xp = array_namespace(Z, Y) + Z = asarray(Z, order='c', xp=xp) + Y = asarray(Y, order='c', xp=xp) return distance.num_obs_y(Y) == num_obs_linkage(Z) @@ -2575,7 +2563,8 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): all data points to be merged together - so a single cluster is returned. """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z, t) + Z = asarray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 @@ -2589,7 +2578,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): if R is None: R = inconsistent(Z, depth) else: - R = np.asarray(R, order='c') + R = asarray(R, order='c', xp=xp) is_valid_im(R, throw=True, name='R') # Since the C code does not support striding using strides. # The dimensions are used instead. @@ -2608,7 +2597,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): else: raise ValueError('Invalid cluster formation criterion: %s' % str(criterion)) - return T + return xp.asarray(T) def fclusterdata(X, t, criterion='inconsistent', @@ -2694,10 +2683,11 @@ def fclusterdata(X, t, criterion='inconsistent', default settings) is four clusters with three data points each. """ - X = np.asarray(X, order='c', dtype=np.double) + xp = array_namespace(X, t) + X = asarray(X, order='c', dtype=xp.dtype('float64')) - if type(X) != np.ndarray or len(X.shape) != 2: - raise TypeError('The observation matrix X must be an n by m numpy ' + if len(X.shape) != 2: + raise TypeError('The observation matrix X must be an n by m ' 'array.') Y = distance.pdist(X, metric=metric) @@ -2705,7 +2695,7 @@ def fclusterdata(X, t, criterion='inconsistent', if R is None: R = inconsistent(Z, d=depth) else: - R = np.asarray(R, order='c') + R = asarray(R, order='c', xp=xp) T = fcluster(Z, criterion=criterion, depth=depth, R=R, t=t) return T @@ -2758,13 +2748,14 @@ def leaves_list(Z): >>> plt.show() """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = asarray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 ML = np.zeros((n,), dtype='i') [Z] = _copy_arrays_if_base_present([Z]) _hierarchy.prelist(Z, ML, int(n)) - return ML + return xp.asarray(ML) # Maps number of leaves to text size. @@ -3294,7 +3285,7 @@ def llf(id): # or results in a crossing, an exception will be thrown. Passing # None orders leaf nodes based on the order they appear in the # pre-order traversal. - Z = np.asarray(Z, order='c') + Z = asarray(Z, order='c') if orientation not in ["top", "left", "bottom", "right"]: raise ValueError("orientation must be one of 'top', 'left', " @@ -3752,13 +3743,9 @@ def is_isomorphic(T1, T2): True """ - T1 = np.asarray(T1, order='c') - T2 = np.asarray(T2, order='c') - - if type(T1) != np.ndarray: - raise TypeError('T1 must be a numpy array.') - if type(T2) != np.ndarray: - raise TypeError('T2 must be a numpy array.') + xp = array_namespace(T1, T2) + T1 = asarray(T1, order='c', xp=xp) + T2 = asarray(T2, order='c', xp=xp) T1S = T1.shape T2S = T2.shape @@ -3859,13 +3846,15 @@ def maxdists(Z): this case. """ - Z = np.asarray(Z, order='c', dtype=np.double) + xp = array_namespace(Z) + Z = asarray(Z, order='c', dtype=xp.dtype('float64'), xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 MD = np.zeros((n - 1,)) [Z] = _copy_arrays_if_base_present([Z]) _hierarchy.get_max_dist_for_each_cluster(Z, MD, int(n)) + MD = xp.asarray(MD) return MD @@ -3944,8 +3933,9 @@ def maxinconsts(Z, R): 1.15470054]) """ - Z = np.asarray(Z, order='c') - R = np.asarray(R, order='c') + xp = array_namespace(Z, R) + Z = asarray(Z, order='c', xp=xp) + R = asarray(R, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') is_valid_im(R, throw=True, name='R') @@ -3956,6 +3946,7 @@ def maxinconsts(Z, R): MI = np.zeros((n - 1,)) [Z, R] = _copy_arrays_if_base_present([Z, R]) _hierarchy.get_max_Rfield_for_each_cluster(Z, R, MI, int(n), 3) + MI = xp.asarray(MI) return MI @@ -4036,8 +4027,9 @@ def maxRstat(Z, R, i): 1.15470054]) """ - Z = np.asarray(Z, order='c') - R = np.asarray(R, order='c') + xp = array_namespace(Z, R) + Z = np.asarray(Z, order='c', xp=xp) + R = np.asarray(R, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') is_valid_im(R, throw=True, name='R') if type(i) is not int: @@ -4053,6 +4045,7 @@ def maxRstat(Z, R, i): MR = np.zeros((n - 1,)) [Z, R] = _copy_arrays_if_base_present([Z, R]) _hierarchy.get_max_Rfield_for_each_cluster(Z, R, MR, int(n), i) + MR = xp.asarray(MR) return MR @@ -4158,10 +4151,11 @@ def leaders(Z, T): array([1, 2, 3, 4], dtype=int32) """ - Z = np.asarray(Z, order='c') - T = np.asarray(T, order='c') - if type(T) != np.ndarray or T.dtype != 'i': - raise TypeError('T must be a one-dimensional numpy array of integers.') + xp = array_namespace(Z, T) + Z = asarray(Z, order='c', xp=xp) + T = asarray(T, order='c', xp=xp) + if T.dtype != 'i': + raise TypeError('T must be a one-dimensional array of integers.') is_valid_linkage(Z, throw=True, name='Z') if len(T) != Z.shape[0] + 1: raise ValueError('Mismatch: len(T)!=Z.shape[0] + 1.') @@ -4176,4 +4170,5 @@ def leaders(Z, T): if s >= 0: raise ValueError(('T is not a valid assignment vector. Error found ' 'when examining linkage node %d (< 2n-1).') % s) + L, M = xp.asarray(L), xp.asarray(M) return (L, M) From 4333c99ce61a5daddef01cb26a70e64f85663341 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 9 May 2023 16:31:21 +0200 Subject: [PATCH 16/87] MAINT: add missing type conversion and asarray --- scipy/cluster/hierarchy.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index 38a60ca347f8..549f4a73d417 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -167,9 +167,9 @@ def _copy_array_if_base_present(a): if a.base is not None: return a.copy() elif np.issubsctype(a, np.float32): - return np.array(a, dtype=np.double) + return a.astype('float64') else: - return np.asarray(a) + return a def _copy_arrays_if_base_present(T): @@ -1555,7 +1555,7 @@ def _convert_to_bool(X): def _convert_to_double(X): if X.dtype != np.double: - X = X.astype(np.double) + X = X.astype('float64') if not X.flags.contiguous: X = X.copy() return X @@ -1679,6 +1679,7 @@ def cophenet(Z, Y=None): # The dimensions are used instead. Z = _convert_to_double(Z) + Z = np.asarray(Z) _hierarchy.cophenetic_distances(Z, zz, int(n)) zz = xp.asarray(zz) if Y is None: @@ -1769,6 +1770,7 @@ def inconsistent(Z, d=2): n = Zs[0] + 1 R = np.zeros((n - 1, 4), dtype=np.double) + Z = xp.asarray(Z) _hierarchy.inconsistent(Z, R, int(n), int(d)) R = xp.asarray(R) return R @@ -1865,6 +1867,7 @@ def from_mlab_linkage(Z): Zpart[:, 0:2] -= 1.0 CS = np.zeros((Zs[0],), dtype=np.double) + Zpart = np.asarray(Zpart) _hierarchy.calculate_cluster_sizes(Zpart, CS, int(Zs[0]) + 1) res = np.hstack([Zpart, CS.reshape(Zs[0], 1)]) return xp.asarray(res) @@ -2574,6 +2577,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): # The dimensions are used instead. [Z] = _copy_arrays_if_base_present([Z]) + Z = np.asarray(Z) if criterion == 'inconsistent': if R is None: R = inconsistent(Z, depth) @@ -2583,6 +2587,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): # Since the C code does not support striding using strides. # The dimensions are used instead. [R] = _copy_arrays_if_base_present([R]) + R = np.asarray(R) _hierarchy.cluster_in(Z, R, T, float(t), int(n)) elif criterion == 'distance': _hierarchy.cluster_dist(Z, T, float(t), int(n)) @@ -2754,6 +2759,7 @@ def leaves_list(Z): n = Z.shape[0] + 1 ML = np.zeros((n,), dtype='i') [Z] = _copy_arrays_if_base_present([Z]) + Z = np.asarray(Z) _hierarchy.prelist(Z, ML, int(n)) return xp.asarray(ML) @@ -3853,6 +3859,7 @@ def maxdists(Z): n = Z.shape[0] + 1 MD = np.zeros((n - 1,)) [Z] = _copy_arrays_if_base_present([Z]) + Z = np.asarray(Z) _hierarchy.get_max_dist_for_each_cluster(Z, MD, int(n)) MD = xp.asarray(MD) return MD @@ -3945,6 +3952,8 @@ def maxinconsts(Z, R): "have a different number of rows.") MI = np.zeros((n - 1,)) [Z, R] = _copy_arrays_if_base_present([Z, R]) + Z = np.asarray(Z) + R = np.asarray(R) _hierarchy.get_max_Rfield_for_each_cluster(Z, R, MI, int(n), 3) MI = xp.asarray(MI) return MI @@ -4044,6 +4053,8 @@ def maxRstat(Z, R, i): n = Z.shape[0] + 1 MR = np.zeros((n - 1,)) [Z, R] = _copy_arrays_if_base_present([Z, R]) + Z = np.asarray(Z) + R = np.asarray(R) _hierarchy.get_max_Rfield_for_each_cluster(Z, R, MR, int(n), i) MR = xp.asarray(MR) return MR @@ -4166,6 +4177,8 @@ def leaders(Z, T): M = np.zeros((kk,), dtype='i') n = Z.shape[0] + 1 [Z, T] = _copy_arrays_if_base_present([Z, T]) + Z = np.asarray(Z) + T = np.asarray(T) s = _hierarchy.leaders(Z, T, L, M, int(kk), int(n)) if s >= 0: raise ValueError(('T is not a valid assignment vector. Error found ' From 068b4faf1de2ca02fc5865c1f9eb9e97a3959060 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 9 May 2023 17:44:47 +0200 Subject: [PATCH 17/87] ENH: add check_finite to asarray --- scipy/_lib/_array_api.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index ed59eb4826ff..0409ed634189 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -9,6 +9,7 @@ import os import numpy as np +from numpy.core.numerictypes import typecodes # probably want to vendor it (submodule) import array_api_compat import array_api_compat.numpy @@ -38,6 +39,17 @@ def compliance_scipy(*arrays): raise TypeError("'numpy.matrix' are not supported") elif not array_api_compat.is_array_api_obj(array): raise TypeError("Only support Array API compatible arrays") + elif array.dtype is np.dtype('O'): + raise ValueError('object arrays are not supported') + + +def _check_finite(array): + """Check for NaNs or Infs.""" + # same as np.asarray_chkfinite + if array.dtype.char in typecodes['AllFloat'] and not np.isfinite(array).all(): + raise ValueError( + "array must not contain infs or NaNs" + ) def array_namespace(*arrays): @@ -75,7 +87,9 @@ def array_namespace(*arrays): return array_api_compat.array_namespace(*arrays) -def asarray(array, dtype=None, order=None, copy=None, *, xp=None): +def asarray( + array, dtype=None, order=None, copy=None, *, xp=None, check_finite=True +): """Drop-in replacement for `np.asarray`. Memory layout parameter `order` is not exposed in the Array API standard. @@ -96,9 +110,14 @@ def asarray(array, dtype=None, order=None, copy=None, *, xp=None): # At this point array is a NumPy ndarray. We convert it to an array # container that is consistent with the input's namespace. - return xp.asarray(array) + array = xp.asarray(array) else: - return xp.asarray(array, dtype=dtype, copy=copy) + array = xp.asarray(array, dtype=dtype, copy=copy) + + if check_finite: + _check_finite(array) + + return array def asarray_namespace(*arrays): From d72de86581f6d8ddf422ee38ce6fd64623a65385 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 9 May 2023 17:44:59 +0200 Subject: [PATCH 18/87] ENH: add array api support to vq --- scipy/cluster/tests/test_vq.py | 10 +++-- scipy/cluster/vq.py | 77 ++++++++++++++++++---------------- 2 files changed, 47 insertions(+), 40 deletions(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 6696ce51556d..8e26794b5ee2 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -11,6 +11,7 @@ from scipy.cluster.vq import (kmeans, kmeans2, py_vq, vq, whiten, ClusterError, _krandinit) from scipy.cluster import _vq +from scipy.conftest import array_api_compatible from scipy.sparse._sputils import matrix @@ -71,13 +72,14 @@ class TestWhiten: - def test_whiten(self): - desired = np.array([[5.08738849, 2.97091878], + @array_api_compatible + def test_whiten(self, xp): + desired = xp.asarray([[5.08738849, 2.97091878], [3.19909255, 0.69660580], [4.51041982, 0.02640918], [4.38567074, 0.95120889], [2.32191480, 1.63195503]]) - for tp in np.array, matrix: + for tp in xp.array, matrix: obs = tp([[0.98744510, 0.82766775], [0.62093317, 0.19406729], [0.87545741, 0.00735733], @@ -270,7 +272,7 @@ def test_krandinit(self): else: rng = np.random.RandomState(1234) - init = _krandinit(data, k, rng) + init = _krandinit(data, k, rng, np) orig_cov = np.cov(data, rowvar=0) init_cov = np.cov(init, rowvar=0) assert_allclose(orig_cov, init_cov, atol=1e-2) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 8770fb69fdc8..bc317ebf99df 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -67,8 +67,8 @@ import warnings import numpy as np from collections import deque -from scipy._lib._util import _asarray_validated, check_random_state,\ - rng_integers +from scipy._lib._array_api import asarray, array_namespace +from scipy._lib._util import check_random_state, rng_integers from scipy.spatial.distance import cdist from . import _vq @@ -129,7 +129,7 @@ def whiten(obs, check_finite=True): [ 1.75976538, 0.7038557 , 7.21248917]]) """ - obs = _asarray_validated(obs, check_finite=check_finite) + obs = asarray(obs, check_finite=check_finite) std_dev = obs.std(axis=0) zero_std_mask = std_dev == 0 if zero_std_mask.any(): @@ -198,14 +198,15 @@ def vq(obs, code_book, check_finite=True): (array([1, 1, 0],'i'), array([ 0.43588989, 0.73484692, 0.83066239])) """ - obs = _asarray_validated(obs, check_finite=check_finite) - code_book = _asarray_validated(code_book, check_finite=check_finite) - ct = np.common_type(obs, code_book) + xp = array_namespace(obs, code_book) + obs = asarray(obs, xp=xp, check_finite=check_finite) + code_book = asarray(code_book, xp=xp, check_finite=check_finite) + ct = xp.common_type(obs, code_book) c_obs = obs.astype(ct, copy=False) c_code_book = code_book.astype(ct, copy=False) - if np.issubdtype(ct, np.float64) or np.issubdtype(ct, np.float32): + if xp.issubdtype(ct, xp.float64) or xp.issubdtype(ct, xp.float32): return _vq.vq(c_obs, c_code_book) return py_vq(obs, code_book, check_finite=False) @@ -247,23 +248,24 @@ def py_vq(obs, code_book, check_finite=True): It is about 20 times slower than the C version. """ - obs = _asarray_validated(obs, check_finite=check_finite) - code_book = _asarray_validated(code_book, check_finite=check_finite) + xp = array_namespace(obs, code_book) + obs = asarray(obs, xp=xp, check_finite=check_finite) + code_book = asarray(code_book, xp=xp, check_finite=check_finite) if obs.ndim != code_book.ndim: raise ValueError("Observation and code_book should have the same rank") if obs.ndim == 1: - obs = obs[:, np.newaxis] - code_book = code_book[:, np.newaxis] + obs = obs[:, xp.newaxis] + code_book = code_book[:, xp.newaxis] dist = cdist(obs, code_book) code = dist.argmin(axis=1) - min_dist = dist[np.arange(len(code)), code] + min_dist = dist[xp.arange(len(code)), code] return code, min_dist -def _kmeans(obs, guess, thresh=1e-5): +def _kmeans(obs, guess, thresh=1e-5, xp=None): """ "raw" version of k-means. Returns @@ -295,9 +297,9 @@ def _kmeans(obs, guess, thresh=1e-5): [ 0.73333333, 1.13333333]]), 0.40563916697728591) """ - - code_book = np.asarray(guess) - diff = np.inf + xp = np if xp is None else xp + code_book = guess + diff = xp.inf prev_avg_dists = deque([diff], maxlen=2) while diff > thresh: # compute membership and distances between obs and code_book @@ -307,7 +309,7 @@ def _kmeans(obs, guess, thresh=1e-5): code_book, has_members = _vq.update_cluster_means(obs, obs_code, code_book.shape[0]) code_book = code_book[has_members] - diff = np.absolute(prev_avg_dists[0] - prev_avg_dists[1]) + diff = xp.absolute(prev_avg_dists[0] - prev_avg_dists[1]) return code_book, prev_avg_dists[1] @@ -447,17 +449,18 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, >>> plt.show() """ - obs = _asarray_validated(obs, check_finite=check_finite) + xp = array_namespace(obs, k_or_guess) + obs = asarray(obs, xp=xp, check_finite=check_finite) if iter < 1: raise ValueError("iter must be at least 1, got %s" % iter) # Determine whether a count (scalar) or an initial guess (array) was passed. - if not np.isscalar(k_or_guess): - guess = _asarray_validated(k_or_guess, check_finite=check_finite) + if not xp.isscalar(k_or_guess): + guess = asarray(k_or_guess, xp=xp, check_finite=check_finite) if guess.size < 1: raise ValueError("Asked for 0 clusters. Initial book was %s" % guess) - return _kmeans(obs, guess, thresh=thresh) + return _kmeans(obs, guess, thresh=thresh, xp=xp) # k_or_guess is a scalar, now verify that it's an integer k = int(k_or_guess) @@ -469,11 +472,11 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, rng = check_random_state(seed) # initialize best distance value to a large value - best_dist = np.inf + best_dist = xp.inf for i in range(iter): # the initial code book is randomly selected from observations guess = _kpoints(obs, k, rng) - book, dist = _kmeans(obs, guess, thresh=thresh) + book, dist = _kmeans(obs, guess, thresh=thresh, xp=xp) if dist < best_dist: best_book = book best_dist = dist @@ -504,7 +507,7 @@ def _kpoints(data, k, rng): return data[idx] -def _krandinit(data, k, rng): +def _krandinit(data, k, rng, xp): """Returns k samples of a random variable whose parameters depend on data. More precisely, it returns k observations sampled from a Gaussian random @@ -530,28 +533,28 @@ def _krandinit(data, k, rng): mu = data.mean(axis=0) if data.ndim == 1: - cov = np.cov(data) + cov = xp.cov(data) x = rng.standard_normal(size=k) - x *= np.sqrt(cov) + x *= xp.sqrt(cov) elif data.shape[1] > data.shape[0]: # initialize when the covariance matrix is rank deficient - _, s, vh = np.linalg.svd(data - mu, full_matrices=False) + _, s, vh = xp.linalg.svd(data - mu, full_matrices=False) x = rng.standard_normal(size=(k, s.size)) - sVh = s[:, None] * vh / np.sqrt(data.shape[0] - 1) + sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - 1) x = x.dot(sVh) else: - cov = np.atleast_2d(np.cov(data, rowvar=False)) + cov = np.atleast_2d(xp.cov(data, rowvar=False)) # k rows, d cols (one row = one obs) # Generate k sample of a random variable ~ Gaussian(mu, cov) x = rng.standard_normal(size=(k, mu.size)) - x = x.dot(np.linalg.cholesky(cov).T) + x = x.dot(xp.linalg.cholesky(cov).T) x += mu return x -def _kpp(data, k, rng): +def _kpp(data, k, rng, xp): """ Picks k points in the data based on the kmeans++ method. Parameters @@ -578,7 +581,7 @@ def _kpp(data, k, rng): """ dims = data.shape[1] if len(data.shape) > 1 else 1 - init = np.ndarray((k, dims)) + init = xp.empty((k, dims)) for i in range(k): if i == 0: @@ -589,6 +592,7 @@ def _kpp(data, k, rng): probs = D2/D2.sum() cumprobs = probs.cumsum() r = rng.uniform() + cumprobs = np.asarray(cumprobs) init[i, :] = data[np.searchsorted(cumprobs, r)] return init @@ -745,7 +749,8 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', except KeyError as e: raise ValueError(f"Unknown missing method {missing!r}") from e - data = _asarray_validated(data, check_finite=check_finite) + xp = array_namespace(data, k) + data = asarray(data, xp=xp, check_finite=check_finite) if data.ndim == 1: d = 1 elif data.ndim == 2: @@ -757,8 +762,8 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', raise ValueError("Empty input is not supported.") # If k is not a single value, it should be compatible with data's shape - if minit == 'matrix' or not np.isscalar(k): - code_book = np.array(k, copy=True) + if minit == 'matrix' or not xp.isscalar(k): + code_book = asarray(k, xp=xp, copy=True) if data.ndim != code_book.ndim: raise ValueError("k array doesn't match data rank") nc = len(code_book) @@ -779,7 +784,7 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', raise ValueError(f"Unknown init method {minit!r}") from e else: rng = check_random_state(seed) - code_book = init_meth(data, k, rng) + code_book = init_meth(data, k, rng, xp) for i in range(iter): # Compute the nearest neighbor for each obs using the current code book From d748ed1382bb288e1efc19f99d5936cf4b839f7e Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 10 May 2023 15:32:54 +0200 Subject: [PATCH 19/87] BUG: fix missing xp in private functions --- scipy/cluster/vq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index bc317ebf99df..09b9f0a1bc2d 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -475,7 +475,7 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, best_dist = xp.inf for i in range(iter): # the initial code book is randomly selected from observations - guess = _kpoints(obs, k, rng) + guess = _kpoints(obs, k, rng, xp) book, dist = _kmeans(obs, guess, thresh=thresh, xp=xp) if dist < best_dist: best_book = book @@ -483,7 +483,7 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, return best_book, best_dist -def _kpoints(data, k, rng): +def _kpoints(data, k, rng, xp): """Pick k points at random in data (one row = one observation). Parameters From 0a1aa679c9742ce853a153353e35e675b6be5fa0 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 10 May 2023 15:33:58 +0200 Subject: [PATCH 20/87] BUG: fix asarray mixup --- scipy/cluster/hierarchy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index 549f4a73d417..d9ca0aa62d39 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -1523,7 +1523,7 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): Z = asarray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') - y = _convert_to_double(xp.asarray(y, order='c', xp=xp)) + y = _convert_to_double(asarray(y, order='c', xp=xp)) if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') @@ -4037,8 +4037,8 @@ def maxRstat(Z, R, i): """ xp = array_namespace(Z, R) - Z = np.asarray(Z, order='c', xp=xp) - R = np.asarray(R, order='c', xp=xp) + Z = asarray(Z, order='c', xp=xp) + R = asarray(R, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') is_valid_im(R, throw=True, name='R') if type(i) is not int: From 2a4f2f7a9169576b0754505cd8fd9a05351e780f Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 10 May 2023 15:53:29 +0200 Subject: [PATCH 21/87] TST: revert array_api_compatible in test_vq --- scipy/cluster/tests/test_vq.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 8e26794b5ee2..ae29a0955b9a 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -11,7 +11,6 @@ from scipy.cluster.vq import (kmeans, kmeans2, py_vq, vq, whiten, ClusterError, _krandinit) from scipy.cluster import _vq -from scipy.conftest import array_api_compatible from scipy.sparse._sputils import matrix @@ -72,14 +71,13 @@ class TestWhiten: - @array_api_compatible - def test_whiten(self, xp): - desired = xp.asarray([[5.08738849, 2.97091878], + def test_whiten(self): + desired = np.array([[5.08738849, 2.97091878], [3.19909255, 0.69660580], [4.51041982, 0.02640918], [4.38567074, 0.95120889], [2.32191480, 1.63195503]]) - for tp in xp.array, matrix: + for tp in np.array, matrix: obs = tp([[0.98744510, 0.82766775], [0.62093317, 0.19406729], [0.87545741, 0.00735733], From 633648b53a4cc21089099918fb2b83de5b57c84f Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 10 May 2023 15:56:11 +0200 Subject: [PATCH 22/87] MAINT: unused import --- scipy/cluster/hierarchy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index d9ca0aa62d39..ca3ceaf5831a 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -134,7 +134,7 @@ import numpy as np from . import _hierarchy, _optimal_leaf_ordering import scipy.spatial.distance as distance -from scipy._lib._array_api import array_namespace, asarray_namespace, asarray +from scipy._lib._array_api import array_namespace, asarray from scipy._lib._disjoint_set import DisjointSet From 735101a161a93d10de3f3874967d493501a763e6 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 11 May 2023 15:00:16 +0200 Subject: [PATCH 23/87] MAINT: refactor asarray to as_xparray --- scipy/_lib/_array_api.py | 4 +- scipy/_lib/tests/test_array_api.py | 4 +- scipy/cluster/hierarchy.py | 60 +++++++++++++++--------------- scipy/cluster/vq.py | 20 +++++----- 4 files changed, 44 insertions(+), 44 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 0409ed634189..9cd46110f11f 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -14,7 +14,7 @@ import array_api_compat import array_api_compat.numpy -__all__ = ['array_namespace', 'asarray', 'asarray_namespace'] +__all__ = ['array_namespace', 'as_xparray', 'asarray_namespace'] # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn @@ -87,7 +87,7 @@ def array_namespace(*arrays): return array_api_compat.array_namespace(*arrays) -def asarray( +def as_xparray( array, dtype=None, order=None, copy=None, *, xp=None, check_finite=True ): """Drop-in replacement for `np.asarray`. diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 87f08c1fcff0..5d9cfae75aba 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -4,7 +4,7 @@ from scipy.conftest import array_api_compatible from scipy._lib._array_api import ( - _GLOBAL_CONFIG, array_namespace, asarray, asarray_namespace, + _GLOBAL_CONFIG, array_namespace, as_xparray, asarray_namespace, to_numpy ) @@ -29,7 +29,7 @@ def test_array_namespace(): @array_api_compatible def test_asarray(xp): - x, y = asarray([0, 1, 2], xp=xp), asarray(np.arange(3), xp=xp) + x, y = as_xparray([0, 1, 2], xp=xp), as_xparray(np.arange(3), xp=xp) ref = np.array([0, 1, 2]) assert_equal(x, ref) assert_equal(y, ref) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index ca3ceaf5831a..be835f407f34 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -134,7 +134,7 @@ import numpy as np from . import _hierarchy, _optimal_leaf_ordering import scipy.spatial.distance as distance -from scipy._lib._array_api import array_namespace, asarray +from scipy._lib._array_api import array_namespace, as_xparray from scipy._lib._disjoint_set import DisjointSet @@ -1440,7 +1440,7 @@ def to_tree(Z, rd=False): 9 """ - Z = asarray(Z, order='c') + Z = as_xparray(Z, order='c') is_valid_linkage(Z, throw=True, name='Z') # Number of original objects is equal to the number of rows plus 1. @@ -1520,10 +1520,10 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): """ xp = array_namespace(Z, y) - Z = asarray(Z, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') - y = _convert_to_double(asarray(y, order='c', xp=xp)) + y = _convert_to_double(as_xparray(y, order='c', xp=xp)) if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') @@ -1669,7 +1669,7 @@ def cophenet(Z, Y=None): """ xp = array_namespace(Z, Y) - Z = asarray(Z, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') Zs = Z.shape n = Zs[0] + 1 @@ -1685,7 +1685,7 @@ def cophenet(Z, Y=None): if Y is None: return zz - Y = asarray(Y, order='c', xp=xp) + Y = as_xparray(Y, order='c', xp=xp) distance.is_valid_y(Y, throw=True, name='Y') z = zz.mean() @@ -1755,7 +1755,7 @@ def inconsistent(Z, d=2): """ xp = array_namespace(Z) - Z = asarray(Z, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) Zs = Z.shape is_valid_linkage(Z, throw=True, name='Z') @@ -1847,7 +1847,7 @@ def from_mlab_linkage(Z): """ xp = array_namespace(Z) - Z = asarray(Z, dtype=xp.dtype('float64'), order='c', xp=xp) + Z = as_xparray(Z, dtype=xp.dtype('float64'), order='c', xp=xp) Zs = Z.shape # If it's empty, return it. @@ -1948,7 +1948,7 @@ def to_mlab_linkage(Z): the original linkage matrix has been dropped. """ - Z = asarray(Z, order='c', dtype=np.double) + Z = as_xparray(Z, order='c', dtype=np.double) Zs = Z.shape if len(Zs) == 0 or (len(Zs) == 1 and Zs[0] == 0): return Z.copy() @@ -2038,7 +2038,7 @@ def is_monotonic(Z): increasing order. """ - Z = asarray(Z, order='c') + Z = as_xparray(Z, order='c') is_valid_linkage(Z, throw=True, name='Z') # We expect the i'th value to be greater than its successor. @@ -2133,7 +2133,7 @@ def is_valid_im(R, warning=False, throw=False, name=None): """ xp = array_namespace(R) - R = asarray(R, order='c', xp=xp) + R = as_xparray(R, order='c', xp=xp) valid = True name_str = "%r " % name if name else '' try: @@ -2250,7 +2250,7 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): """ xp = array_namespace(Z) - Z = asarray(Z, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) valid = True name_str = "%r " % name if name else '' try: @@ -2353,7 +2353,7 @@ def num_obs_linkage(Z): 12 """ - Z = asarray(Z, order='c') + Z = as_xparray(Z, order='c') is_valid_linkage(Z, throw=True, name='Z') return (Z.shape[0] + 1) @@ -2410,8 +2410,8 @@ def correspond(Z, Y): is_valid_linkage(Z, throw=True) distance.is_valid_y(Y, throw=True) xp = array_namespace(Z, Y) - Z = asarray(Z, order='c', xp=xp) - Y = asarray(Y, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) + Y = as_xparray(Y, order='c', xp=xp) return distance.num_obs_y(Y) == num_obs_linkage(Z) @@ -2567,7 +2567,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): """ xp = array_namespace(Z, t) - Z = asarray(Z, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 @@ -2582,7 +2582,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): if R is None: R = inconsistent(Z, depth) else: - R = asarray(R, order='c', xp=xp) + R = as_xparray(R, order='c', xp=xp) is_valid_im(R, throw=True, name='R') # Since the C code does not support striding using strides. # The dimensions are used instead. @@ -2689,7 +2689,7 @@ def fclusterdata(X, t, criterion='inconsistent', """ xp = array_namespace(X, t) - X = asarray(X, order='c', dtype=xp.dtype('float64')) + X = as_xparray(X, order='c', dtype=xp.dtype('float64')) if len(X.shape) != 2: raise TypeError('The observation matrix X must be an n by m ' @@ -2700,7 +2700,7 @@ def fclusterdata(X, t, criterion='inconsistent', if R is None: R = inconsistent(Z, d=depth) else: - R = asarray(R, order='c', xp=xp) + R = as_xparray(R, order='c', xp=xp) T = fcluster(Z, criterion=criterion, depth=depth, R=R, t=t) return T @@ -2754,7 +2754,7 @@ def leaves_list(Z): """ xp = array_namespace(Z) - Z = asarray(Z, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 ML = np.zeros((n,), dtype='i') @@ -3291,7 +3291,7 @@ def llf(id): # or results in a crossing, an exception will be thrown. Passing # None orders leaf nodes based on the order they appear in the # pre-order traversal. - Z = asarray(Z, order='c') + Z = as_xparray(Z, order='c') if orientation not in ["top", "left", "bottom", "right"]: raise ValueError("orientation must be one of 'top', 'left', " @@ -3750,8 +3750,8 @@ def is_isomorphic(T1, T2): """ xp = array_namespace(T1, T2) - T1 = asarray(T1, order='c', xp=xp) - T2 = asarray(T2, order='c', xp=xp) + T1 = as_xparray(T1, order='c', xp=xp) + T2 = as_xparray(T2, order='c', xp=xp) T1S = T1.shape T2S = T2.shape @@ -3853,7 +3853,7 @@ def maxdists(Z): """ xp = array_namespace(Z) - Z = asarray(Z, order='c', dtype=xp.dtype('float64'), xp=xp) + Z = as_xparray(Z, order='c', dtype=xp.dtype('float64'), xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 @@ -3941,8 +3941,8 @@ def maxinconsts(Z, R): """ xp = array_namespace(Z, R) - Z = asarray(Z, order='c', xp=xp) - R = asarray(R, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) + R = as_xparray(R, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') is_valid_im(R, throw=True, name='R') @@ -4037,8 +4037,8 @@ def maxRstat(Z, R, i): """ xp = array_namespace(Z, R) - Z = asarray(Z, order='c', xp=xp) - R = asarray(R, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) + R = as_xparray(R, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') is_valid_im(R, throw=True, name='R') if type(i) is not int: @@ -4163,8 +4163,8 @@ def leaders(Z, T): """ xp = array_namespace(Z, T) - Z = asarray(Z, order='c', xp=xp) - T = asarray(T, order='c', xp=xp) + Z = as_xparray(Z, order='c', xp=xp) + T = as_xparray(T, order='c', xp=xp) if T.dtype != 'i': raise TypeError('T must be a one-dimensional array of integers.') is_valid_linkage(Z, throw=True, name='Z') diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 09b9f0a1bc2d..d7fcae189cb9 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -67,7 +67,7 @@ import warnings import numpy as np from collections import deque -from scipy._lib._array_api import asarray, array_namespace +from scipy._lib._array_api import as_xparray, array_namespace from scipy._lib._util import check_random_state, rng_integers from scipy.spatial.distance import cdist @@ -129,7 +129,7 @@ def whiten(obs, check_finite=True): [ 1.75976538, 0.7038557 , 7.21248917]]) """ - obs = asarray(obs, check_finite=check_finite) + obs = as_xparray(obs, check_finite=check_finite) std_dev = obs.std(axis=0) zero_std_mask = std_dev == 0 if zero_std_mask.any(): @@ -199,8 +199,8 @@ def vq(obs, code_book, check_finite=True): """ xp = array_namespace(obs, code_book) - obs = asarray(obs, xp=xp, check_finite=check_finite) - code_book = asarray(code_book, xp=xp, check_finite=check_finite) + obs = as_xparray(obs, xp=xp, check_finite=check_finite) + code_book = as_xparray(code_book, xp=xp, check_finite=check_finite) ct = xp.common_type(obs, code_book) c_obs = obs.astype(ct, copy=False) @@ -249,8 +249,8 @@ def py_vq(obs, code_book, check_finite=True): """ xp = array_namespace(obs, code_book) - obs = asarray(obs, xp=xp, check_finite=check_finite) - code_book = asarray(code_book, xp=xp, check_finite=check_finite) + obs = as_xparray(obs, xp=xp, check_finite=check_finite) + code_book = as_xparray(code_book, xp=xp, check_finite=check_finite) if obs.ndim != code_book.ndim: raise ValueError("Observation and code_book should have the same rank") @@ -450,13 +450,13 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, """ xp = array_namespace(obs, k_or_guess) - obs = asarray(obs, xp=xp, check_finite=check_finite) + obs = as_xparray(obs, xp=xp, check_finite=check_finite) if iter < 1: raise ValueError("iter must be at least 1, got %s" % iter) # Determine whether a count (scalar) or an initial guess (array) was passed. if not xp.isscalar(k_or_guess): - guess = asarray(k_or_guess, xp=xp, check_finite=check_finite) + guess = as_xparray(k_or_guess, xp=xp, check_finite=check_finite) if guess.size < 1: raise ValueError("Asked for 0 clusters. Initial book was %s" % guess) @@ -750,7 +750,7 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', raise ValueError(f"Unknown missing method {missing!r}") from e xp = array_namespace(data, k) - data = asarray(data, xp=xp, check_finite=check_finite) + data = as_xparray(data, xp=xp, check_finite=check_finite) if data.ndim == 1: d = 1 elif data.ndim == 2: @@ -763,7 +763,7 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', # If k is not a single value, it should be compatible with data's shape if minit == 'matrix' or not xp.isscalar(k): - code_book = asarray(k, xp=xp, copy=True) + code_book = as_xparray(k, xp=xp, copy=True) if data.ndim != code_book.ndim: raise ValueError("k array doesn't match data rank") nc = len(code_book) From eb79de00c1b3a2e4a6e7419e00b4c7f21a9c9a35 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 11 May 2023 16:18:57 +0200 Subject: [PATCH 24/87] MAINT: refactor asarray_namespace to as_xparray_namespace --- scipy/_lib/_array_api.py | 6 +++--- scipy/_lib/tests/test_array_api.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 9cd46110f11f..b93d028927ee 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -14,7 +14,7 @@ import array_api_compat import array_api_compat.numpy -__all__ = ['array_namespace', 'as_xparray', 'asarray_namespace'] +__all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace'] # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn @@ -120,7 +120,7 @@ def as_xparray( return array -def asarray_namespace(*arrays): +def as_xparray_namespace(*arrays): """Validate and convert arrays to a common namespace. Parameters @@ -151,7 +151,7 @@ def asarray_namespace(*arrays): Examples -------- >>> import numpy as np - >>> x, y, xp = asarray_namespace(np.array([0, 1, 2]), np.array([0, 1, 2])) + >>> x, y, xp = as_xparray_namespace(np.array([0, 1, 2]), np.array([0, 1, 2])) >>> xp.__name__ 'array_api_compat.numpy' >>> x, y diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 5d9cfae75aba..f27c9c6cb0e2 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -4,7 +4,7 @@ from scipy.conftest import array_api_compatible from scipy._lib._array_api import ( - _GLOBAL_CONFIG, array_namespace, as_xparray, asarray_namespace, + _GLOBAL_CONFIG, array_namespace, as_xparray, as_xparray_namespace, to_numpy ) @@ -35,9 +35,9 @@ def test_asarray(xp): assert_equal(y, ref) -def test_asarray_namespace(): +def test_as_xparray_namespace(): x, y = np.array([0, 1, 2]), np.array([0, 1, 2]) - x, y, xp_ = asarray_namespace(x, y) + x, y, xp_ = as_xparray_namespace(x, y) assert xp_.__name__ == 'array_api_compat.numpy' ref = np.array([0, 1, 2]) assert_equal(x, ref) @@ -45,7 +45,7 @@ def test_asarray_namespace(): assert type(x) == type(y) _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False - x, y, xp_ = asarray_namespace(x, y) + x, y, xp_ = as_xparray_namespace(x, y) assert xp_.__name__ == 'numpy' _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True From b4b9f32faf3c8e6402241c3d5221a68dbe2cb63e Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 11 May 2023 19:25:05 +0200 Subject: [PATCH 25/87] BUG: fix isfinite check --- scipy/_lib/_array_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index b93d028927ee..fff7234938f6 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -43,10 +43,10 @@ def compliance_scipy(*arrays): raise ValueError('object arrays are not supported') -def _check_finite(array): +def _check_finite(array, xp): """Check for NaNs or Infs.""" # same as np.asarray_chkfinite - if array.dtype.char in typecodes['AllFloat'] and not np.isfinite(array).all(): + if array.dtype.char in typecodes['AllFloat'] and not xp.isfinite(array).all(): raise ValueError( "array must not contain infs or NaNs" ) @@ -115,7 +115,7 @@ def as_xparray( array = xp.asarray(array, dtype=dtype, copy=copy) if check_finite: - _check_finite(array) + _check_finite(array, xp) return array From 1623d0e540a5c3acccf6df7cddb822302e26d8a6 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 11 May 2023 19:53:21 +0200 Subject: [PATCH 26/87] CI: add pytorch cpu workflow --- .github/workflows/array_api.yml | 86 +++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 .github/workflows/array_api.yml diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml new file mode 100644 index 000000000000..c453ba179b35 --- /dev/null +++ b/.github/workflows/array_api.yml @@ -0,0 +1,86 @@ +name: Linux Array API + +on: + push: + branches: + - maintenance/** + pull_request: + branches: + - main + - maintenance/** + +permissions: + contents: read # to fetch code (actions/checkout) + +env: + CCACHE_DIR: "${{ github.workspace }}/.ccache" + INSTALLDIR: "build-install" + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + pytorch_cpu: + name: Meson build + # if: "github.repository == 'scipy/scipy' || github.repository == ''" + runs-on: ubuntu-22.04 + strategy: + matrix: + python-version: [3.11'] + maintenance-branch: + - ${{ contains(github.ref, 'maintenance/') || contains(github.base_ref, 'maintenance/') }} + exclude: + - maintenance-branch: true + + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' # not using a path to also cache pytorch + + - name: Install Ubuntu dependencies + run: | + sudo apt-get update + sudo apt-get install -y libopenblas-dev libatlas-base-dev liblapack-dev gfortran libgmp-dev libmpfr-dev libsuitesparse-dev ccache libmpc-dev + + - name: Install Python packages + run: | + python -m pip install numpy cython pytest pytest-xdist pytest-timeout pybind11 mpmath gmpy2 pythran ninja meson click rich-click doit pydevtool pooch + # Packages for Array API testing + python -m pip install array-api-compat + python -m pip install torch --index-url https://download.pytorch.org/whl/cpu + + - name: Prepare compiler cache + id: prep-ccache + shell: bash + run: | + mkdir -p "${CCACHE_DIR}" + echo "dir=$CCACHE_DIR" >> $GITHUB_OUTPUT + NOW=$(date -u +"%F-%T") + echo "timestamp=${NOW}" >> $GITHUB_OUTPUT + + - name: Setup compiler cache + uses: actions/cache@v3 + id: cache-ccache + with: + path: ${{ steps.prep-ccache.outputs.dir }} + key: ${{ github.workflow }}-${{ matrix.python-version }}-ccache-linux-${{ steps.prep-ccache.outputs.timestamp }} + restore-keys: | + ${{ github.workflow }}-${{ matrix.python-version }}-ccache-linux- + + - name: Setup build and install scipy + run: | + python dev.py build + + - name: Test SciPy + run: | + export OMP_NUM_THREADS=2 + export SCIPY_USE_PROPACK=1 + export SCIPY_ARRAY_API=True + python dev.py --no-build test -s cluster -- --durations 10 --timeout=60 From abdb9b231d0fc9159a1bd7aceb39fcee01682e14 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 11 May 2023 20:07:05 +0200 Subject: [PATCH 27/87] ENH: handle None case --- scipy/_lib/_array_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index fff7234938f6..6945dbaf806a 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -82,6 +82,8 @@ def array_namespace(*arrays): # here we could wrap the namespace if needed return np + arrays = [array for array in arrays if array is not None] + compliance_scipy(*arrays) return array_api_compat.array_namespace(*arrays) From c432d88ca5b338ee6083b805ee8e703bd8acbddf Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 09:41:24 +0200 Subject: [PATCH 28/87] CI: change name workflow --- .github/workflows/array_api.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index c453ba179b35..42fa69a7bc1f 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -1,4 +1,4 @@ -name: Linux Array API +name: Array API on: push: @@ -22,12 +22,12 @@ concurrency: jobs: pytorch_cpu: - name: Meson build + name: Linux PyTorch CPU # if: "github.repository == 'scipy/scipy' || github.repository == ''" runs-on: ubuntu-22.04 strategy: matrix: - python-version: [3.11'] + python-version: ['3.11'] maintenance-branch: - ${{ contains(github.ref, 'maintenance/') || contains(github.base_ref, 'maintenance/') }} exclude: From a0b06cfaeea2c80b13d00c982141793768faa66c Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 15:59:00 +0200 Subject: [PATCH 29/87] ENH: first draft to have --array-api-backend as option in dev.py --- dev.py | 8 ++++++++ scipy/conftest.py | 37 +++++++++++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/dev.py b/dev.py index 099b9dc36d2d..7b642e6bb638 100644 --- a/dev.py +++ b/dev.py @@ -716,6 +716,11 @@ class Test(Task): ['--parallel', '-j'], default=1, metavar='N_JOBS', help="Number of parallel jobs for testing" ) + array_api_backend = Option( + ['--array-api-backend', '-b'], default=None, metavar='ARRAY_BACKEND', + multiple=True, + help="List of Array API backends ('numpy', 'pytorch', 'numpy.array_api')" + ) # Argument can't have `help=`; used to consume all of `-- arg1 arg2 arg3` pytest_args = Argument( ['pytest_args'], nargs=-1, metavar='PYTEST-ARGS', required=False @@ -756,6 +761,9 @@ def scipy_tests(cls, args, pytest_args): else: tests = None + if args.array_api_backend is not None: + os.environ['SCIPY_ARRAY_API'] = json.dumps(list(args.array_api_backend)) + runner, version, mod_path = get_test_runner(PROJECT_MODULE) # FIXME: changing CWD is not a good practice with working_dir(dirs.site): diff --git a/scipy/conftest.py b/scipy/conftest.py index 06ce0e44114d..6c6d250fb65e 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -1,11 +1,12 @@ # Pytest customization +import json import os -import pytest import warnings import numpy as np import numpy.array_api import numpy.testing as npt +import pytest from scipy._lib._fpumode import get_fpu_mode from scipy._lib._testutils import FPUModeChangeWarning @@ -97,6 +98,34 @@ def check_fpu_mode(request): "the test".format(old_mode, new_mode), category=FPUModeChangeWarning, stacklevel=0) -array_api_compatible = pytest.mark.parametrize( - "xp", [np, *((numpy.array_api,) if SCIPY_ARRAY_API else ())] -) + +# Array API backend handling +array_api_backends = (np,) + +if SCIPY_ARRAY_API: + # only use PyTorch CPU on GitHub actions + array_api_available_backends = { + 'numpy': np, 'numpy.array_api': numpy.array_api, + } + + try: + import torch + array_api_available_backends.update({'pytorch': torch}) + except ImportError: + pass + + array_api_backends = (np, numpy.array_api) + + if isinstance(SCIPY_ARRAY_API, str): + SCIPY_ARRAY_API = json.loads(SCIPY_ARRAY_API) + + try: + array_api_backends = [ + array_api_available_backends[backend] + for backend in SCIPY_ARRAY_API + ] + except KeyError: + msg = f"'--array-api-backend' must be in {array_api_available_backends}" + raise ValueError(msg) + +array_api_compatible = pytest.mark.parametrize("xp", array_api_backends) From 37c4c0fa65ffd0d41b174471fd3a3764fd4ecd92 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 17:26:06 +0200 Subject: [PATCH 30/87] MAINT: add 'all' and use mechanism in CI --- .github/workflows/array_api.yml | 3 +-- dev.py | 5 ++++- scipy/conftest.py | 20 +++++++++++--------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 42fa69a7bc1f..a2feb865384e 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -82,5 +82,4 @@ jobs: run: | export OMP_NUM_THREADS=2 export SCIPY_USE_PROPACK=1 - export SCIPY_ARRAY_API=True - python dev.py --no-build test -s cluster -- --durations 10 --timeout=60 + python dev.py --no-build test --array-api-backend pytorch -s cluster -- --durations 10 --timeout=60 diff --git a/dev.py b/dev.py index 7b642e6bb638..3a93ffc8220d 100644 --- a/dev.py +++ b/dev.py @@ -685,6 +685,7 @@ class Test(Task): $ python dev.py test -t scipy.optimize.tests.test_minimize_constrained $ python dev.py test -s cluster -m full --durations 20 $ python dev.py test -s stats -- --tb=line # `--` passes next args to pytest + $ python dev.py test -b numpy -b pytorch -s cluster ``` """ # noqa: E501 ctx = CONTEXT @@ -719,7 +720,9 @@ class Test(Task): array_api_backend = Option( ['--array-api-backend', '-b'], default=None, metavar='ARRAY_BACKEND', multiple=True, - help="List of Array API backends ('numpy', 'pytorch', 'numpy.array_api')" + help=( + "Array API backend ('all', 'numpy', 'pytorch', 'numpy.array_api')." + ) ) # Argument can't have `help=`; used to consume all of `-- arg1 arg2 arg3` pytest_args = Argument( diff --git a/scipy/conftest.py b/scipy/conftest.py index 6c6d250fb65e..58f4a1b50e9d 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -103,7 +103,6 @@ def check_fpu_mode(request): array_api_backends = (np,) if SCIPY_ARRAY_API: - # only use PyTorch CPU on GitHub actions array_api_available_backends = { 'numpy': np, 'numpy.array_api': numpy.array_api, } @@ -119,13 +118,16 @@ def check_fpu_mode(request): if isinstance(SCIPY_ARRAY_API, str): SCIPY_ARRAY_API = json.loads(SCIPY_ARRAY_API) - try: - array_api_backends = [ - array_api_available_backends[backend] - for backend in SCIPY_ARRAY_API - ] - except KeyError: - msg = f"'--array-api-backend' must be in {array_api_available_backends}" - raise ValueError(msg) + if 'all' in SCIPY_ARRAY_API: + array_api_backends = array_api_available_backends.values() + else: + try: + array_api_backends = [ + array_api_available_backends[backend] + for backend in SCIPY_ARRAY_API + ] + except KeyError: + msg = f"'--array-api-backend' must be in {array_api_available_backends}" + raise ValueError(msg) array_api_compatible = pytest.mark.parametrize("xp", array_api_backends) From afc5fdcd525f94d6ccfa9d9789e8e5ce212b62f6 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 17:31:59 +0200 Subject: [PATCH 31/87] BUG: fix tuple empty check --- dev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev.py b/dev.py index 3a93ffc8220d..e93c0188d3c2 100644 --- a/dev.py +++ b/dev.py @@ -764,7 +764,7 @@ def scipy_tests(cls, args, pytest_args): else: tests = None - if args.array_api_backend is not None: + if len(args.array_api_backend) != 0: os.environ['SCIPY_ARRAY_API'] = json.dumps(list(args.array_api_backend)) runner, version, mod_path = get_test_runner(PROJECT_MODULE) From f900a4dad15fb0299032aa64e78c7fe3f21a459a Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 17:37:23 +0200 Subject: [PATCH 32/87] BUG: fix boolean case --- scipy/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/conftest.py b/scipy/conftest.py index 58f4a1b50e9d..0ac39f36cbe7 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -115,7 +115,7 @@ def check_fpu_mode(request): array_api_backends = (np, numpy.array_api) - if isinstance(SCIPY_ARRAY_API, str): + if SCIPY_ARRAY_API.lower() != "true": SCIPY_ARRAY_API = json.loads(SCIPY_ARRAY_API) if 'all' in SCIPY_ARRAY_API: From d6b568ad0d8fa3a5f31083e1c182d6f90c0fc39e Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 19:56:11 +0200 Subject: [PATCH 33/87] TST: add skip_if_array_api marker --- scipy/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scipy/conftest.py b/scipy/conftest.py index 0ac39f36cbe7..3d2601f5ef18 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -131,3 +131,8 @@ def check_fpu_mode(request): raise ValueError(msg) array_api_compatible = pytest.mark.parametrize("xp", array_api_backends) + +skip_if_array_api = pytest.mark.skipif( + SCIPY_ARRAY_API, + reason="do not run with Array API on", +) From 560b5919bd5d313f2a4280c0a3e5c78368f27239 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 19:56:51 +0200 Subject: [PATCH 34/87] BUG: fix kmeans and kmeans2 scalar handling --- scipy/cluster/vq.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index d7fcae189cb9..3b2aa6bd604b 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -451,20 +451,21 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, """ xp = array_namespace(obs, k_or_guess) obs = as_xparray(obs, xp=xp, check_finite=check_finite) + guess = as_xparray(k_or_guess, xp=xp, check_finite=check_finite) if iter < 1: raise ValueError("iter must be at least 1, got %s" % iter) # Determine whether a count (scalar) or an initial guess (array) was passed. - if not xp.isscalar(k_or_guess): - guess = as_xparray(k_or_guess, xp=xp, check_finite=check_finite) + if guess.size > 1: if guess.size < 1: raise ValueError("Asked for 0 clusters. Initial book was %s" % guess) - return _kmeans(obs, guess, thresh=thresh, xp=xp) + elif guess.size > 1: + return _kmeans(obs, guess, thresh=thresh, xp=xp) # k_or_guess is a scalar, now verify that it's an integer - k = int(k_or_guess) - if k != k_or_guess: + k = int(guess) + if k != guess: raise ValueError("If k_or_guess is a scalar, it must be an integer.") if k < 1: raise ValueError("Asked for %d clusters." % k) @@ -751,6 +752,7 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', xp = array_namespace(data, k) data = as_xparray(data, xp=xp, check_finite=check_finite) + code_book = as_xparray(k, xp=xp, copy=True) if data.ndim == 1: d = 1 elif data.ndim == 2: @@ -758,24 +760,23 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', else: raise ValueError("Input of rank > 2 is not supported.") - if data.size < 1: + if data.size < 1 or code_book.size < 1: raise ValueError("Empty input is not supported.") # If k is not a single value, it should be compatible with data's shape - if minit == 'matrix' or not xp.isscalar(k): - code_book = as_xparray(k, xp=xp, copy=True) + if minit == 'matrix' or code_book.size > 1: if data.ndim != code_book.ndim: raise ValueError("k array doesn't match data rank") nc = len(code_book) if data.ndim > 1 and code_book.shape[1] != d: raise ValueError("k array doesn't match data dimension") else: - nc = int(k) + nc = int(code_book) if nc < 1: raise ValueError("Cannot ask kmeans2 for %d clusters" - " (k was %s)" % (nc, k)) - elif nc != k: + " (k was %s)" % (nc, code_book)) + elif nc != code_book: warnings.warn("k was not an integer, was converted.") try: @@ -784,7 +785,7 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', raise ValueError(f"Unknown init method {minit!r}") from e else: rng = check_random_state(seed) - code_book = init_meth(data, k, rng, xp) + code_book = init_meth(data, code_book, rng, xp) for i in range(iter): # Compute the nearest neighbor for each obs using the current code book From 9ea70e62b9df2fd500899e805f17e9fd50cca3ea Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 20:20:02 +0200 Subject: [PATCH 35/87] TST: adjust test_vq --- scipy/cluster/tests/test_vq.py | 60 ++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index ae29a0955b9a..bdcee068ffe2 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -11,7 +11,9 @@ from scipy.cluster.vq import (kmeans, kmeans2, py_vq, vq, whiten, ClusterError, _krandinit) from scipy.cluster import _vq +from scipy.conftest import skip_if_array_api from scipy.sparse._sputils import matrix +from scipy._lib._array_api import SCIPY_ARRAY_API TESTDATA_2D = np.array([ @@ -77,7 +79,8 @@ def test_whiten(self): [4.51041982, 0.02640918], [4.38567074, 0.95120889], [2.32191480, 1.63195503]]) - for tp in np.array, matrix: + arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + for tp in arrays: obs = tp([[0.98744510, 0.82766775], [0.62093317, 0.19406729], [0.87545741, 0.00735733], @@ -89,7 +92,8 @@ def test_whiten_zero_std(self): desired = np.array([[0., 1.0, 2.86666544], [0., 1.0, 1.32460034], [0., 1.0, 3.74382172]]) - for tp in np.array, matrix: + arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + for tp in arrays: obs = tp([[0., 1., 0.74109533], [0., 1., 0.34243798], [0., 1., 0.96785929]]) @@ -100,7 +104,8 @@ def test_whiten_zero_std(self): assert_(issubclass(w[-1].category, RuntimeWarning)) def test_whiten_not_finite(self): - for tp in np.array, matrix: + arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + for tp in arrays: for bad_value in np.nan, np.inf, -np.inf: obs = tp([[0.98744510, bad_value], [0.62093317, 0.19406729], @@ -113,13 +118,15 @@ def test_whiten_not_finite(self): class TestVq: def test_py_vq(self): initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - for tp in np.array, matrix: + arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + for tp in arrays: label1 = py_vq(tp(X), tp(initc))[0] assert_array_equal(label1, LABEL1) def test_vq(self): initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - for tp in np.array, matrix: + arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + for tp in arrays: label1, dist = _vq.vq(tp(X), tp(initc)) assert_array_equal(label1, LABEL1) tlabel1, tdist = vq(tp(X), tp(initc)) @@ -185,12 +192,13 @@ def test_large_features(self): data[:x.shape[0]] = x data[x.shape[0]:] = y - kmeans(data, 2) + kmeans(data, np.asarray(2)) def test_kmeans_simple(self): np.random.seed(54321) initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - for tp in np.array, matrix: + arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + for tp in arrays: code1 = kmeans(tp(X), tp(initc), iter=1)[0] assert_array_almost_equal(code1, CODET2) @@ -213,7 +221,8 @@ def test_kmeans_lost_cluster(self): def test_kmeans2_simple(self): np.random.seed(12345678) initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - for tp in np.array, matrix: + arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + for tp in arrays: code1 = kmeans2(tp(X), tp(initc), iter=1)[0] code2 = kmeans2(tp(X), tp(initc), iter=2)[0] @@ -232,30 +241,31 @@ def test_kmeans2_rank1(self): def test_kmeans2_rank1_2(self): data = TESTDATA_2D data1 = data[:, 0] - kmeans2(data1, 2, iter=1) + kmeans2(data1, np.asarray(2), iter=1) def test_kmeans2_high_dim(self): # test kmeans2 when the number of dimensions exceeds the number # of input points data = TESTDATA_2D data = data.reshape((20, 20))[:10] - kmeans2(data, 2) + kmeans2(data, np.asarray(2)) def test_kmeans2_init(self): np.random.seed(12345) data = TESTDATA_2D + k = np.asarray(3) - kmeans2(data, 3, minit='points') - kmeans2(data[:, :1], 3, minit='points') # special case (1-D) + kmeans2(data, k, minit='points') + kmeans2(data[:, :1], k, minit='points') # special case (1-D) - kmeans2(data, 3, minit='++') - kmeans2(data[:, :1], 3, minit='++') # special case (1-D) + kmeans2(data, k, minit='++') + kmeans2(data[:, :1], k, minit='++') # special case (1-D) # minit='random' can give warnings, filter those with suppress_warnings() as sup: sup.filter(message="One of the clusters is empty. Re-run.") - kmeans2(data, 3, minit='random') - kmeans2(data[:, :1], 3, minit='random') # special case (1-D) + kmeans2(data, k, minit='random') + kmeans2(data[:, :1], k, minit='random') # special case (1-D) @pytest.mark.skipif(sys.platform == 'win32', reason='Fails with MemoryError in Wine.') @@ -277,8 +287,9 @@ def test_krandinit(self): def test_kmeans2_empty(self): # Regression test for gh-1032. - assert_raises(ValueError, kmeans2, [], 2) + assert_raises(ValueError, kmeans2, np.asarray([]), np.asarray(2)) + @skip_if_array_api def test_kmeans_0k(self): # Regression test for gh-1073: fail when k arg is 0. assert_raises(ValueError, kmeans, X, 0) @@ -288,7 +299,7 @@ def test_kmeans_0k(self): def test_kmeans_large_thres(self): # Regression test for gh-1774 x = np.array([1, 2, 3, 4, 10], dtype=float) - res = kmeans(x, 1, thresh=1e16) + res = kmeans(x, np.asarray(1), thresh=1e16) assert_allclose(res[0], np.array([4.])) assert_allclose(res[1], 2.3999999999999999) @@ -297,7 +308,7 @@ def test_kmeans2_kpp_low_dim(self): prev_res = np.array([[-1.95266667, 0.898], [-3.153375, 3.3945]]) np.random.seed(42) - res, _ = kmeans2(TESTDATA_2D, 2, minit='++') + res, _ = kmeans2(TESTDATA_2D, np.asarray(2), minit='++') assert_allclose(res, prev_res) def test_kmeans2_kpp_high_dim(self): @@ -311,7 +322,7 @@ def test_kmeans2_kpp_high_dim(self): np.random.multivariate_normal(centers[0], np.eye(n_dim), size=size), np.random.multivariate_normal(centers[1], np.eye(n_dim), size=size) ]) - res, _ = kmeans2(data, 2, minit='++') + res, _ = kmeans2(data, np.asarray(2), minit='++') assert_array_almost_equal(res, centers, decimal=0) def test_kmeans_diff_convergence(self): @@ -321,13 +332,12 @@ def test_kmeans_diff_convergence(self): assert_allclose(res[0], np.array([-0.4, 8.])) assert_allclose(res[1], 1.0666666666666667) + @skip_if_array_api def test_kmeans_and_kmeans2_random_seed(self): - seed_list = [1234, np.random.RandomState(1234)] - - # check that np.random.Generator can be used (numpy >= 1.17) - if hasattr(np.random, 'default_rng'): - seed_list.append(np.random.default_rng(1234)) + seed_list = [ + 1234, np.random.RandomState(1234), np.random.default_rng(1234) + ] for seed in seed_list: # test for kmeans From e54c36ddb7dd30994a943402c6b851362fab150c Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 20:24:59 +0200 Subject: [PATCH 36/87] TST: adjust test_hierarchy --- scipy/cluster/hierarchy.py | 4 +-- scipy/cluster/tests/test_hierarchy.py | 37 +++++++++++++++------------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index be835f407f34..58975d785464 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -2566,7 +2566,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): all data points to be merged together - so a single cluster is returned. """ - xp = array_namespace(Z, t) + xp = array_namespace(Z) Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') @@ -2688,7 +2688,7 @@ def fclusterdata(X, t, criterion='inconsistent', default settings) is four clusters with three data points each. """ - xp = array_namespace(X, t) + xp = array_namespace(X) X = as_xparray(X, order='c', dtype=xp.dtype('float64')) if len(X.shape) != 2: diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index 6398b6781dc2..f49951975fb1 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -47,6 +47,7 @@ _order_cluster_tree, _hierarchy, _LINKAGE_METHODS) from scipy.spatial.distance import pdist from scipy.cluster._hierarchy import Heap +from scipy.conftest import skip_if_array_api from . import hierarchy_test_data @@ -180,6 +181,7 @@ def test_linkage_cophenet_tdist_Z_Y(self): class TestMLabLinkageConversion: + @skip_if_array_api def test_mlab_linkage_conversion_empty(self): # Tests from/to_mlab_linkage on empty linkage array. X = np.asarray([]) @@ -189,7 +191,7 @@ def test_mlab_linkage_conversion_empty(self): def test_mlab_linkage_conversion_single_row(self): # Tests from/to_mlab_linkage on linkage array with single row. Z = np.asarray([[0., 1., 3., 2.]]) - Zm = [[1, 2, 3]] + Zm = np.asarray([[1, 2, 3]]) assert_equal(from_mlab_linkage(Zm), Z) assert_equal(to_mlab_linkage(Z), Zm) @@ -270,6 +272,7 @@ def test_leaders_single(self): class TestIsIsomorphic: + @skip_if_array_api def test_is_isomorphic_1(self): # Tests is_isomorphic on test case #1 (one flat cluster, different labellings) a = [1, 1, 1] @@ -279,35 +282,35 @@ def test_is_isomorphic_1(self): def test_is_isomorphic_2(self): # Tests is_isomorphic on test case #2 (two flat clusters, different labelings) - a = [1, 7, 1] - b = [2, 3, 2] + a = np.asarray([1, 7, 1]) + b = np.asarray([2, 3, 2]) assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) def test_is_isomorphic_3(self): # Tests is_isomorphic on test case #3 (no flat clusters) - a = [] - b = [] + a = np.asarray([]) + b = np.asarray([]) assert_(is_isomorphic(a, b)) def test_is_isomorphic_4A(self): # Tests is_isomorphic on test case #4A (3 flat clusters, different labelings, isomorphic) - a = [1, 2, 3] - b = [1, 3, 2] + a = np.asarray([1, 2, 3]) + b = np.asarray([1, 3, 2]) assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) def test_is_isomorphic_4B(self): # Tests is_isomorphic on test case #4B (3 flat clusters, different labelings, nonisomorphic) - a = [1, 2, 3, 3] - b = [1, 3, 2, 3] + a = np.asarray([1, 2, 3, 3]) + b = np.asarray([1, 3, 2, 3]) assert_(is_isomorphic(a, b) is False) assert_(is_isomorphic(b, a) is False) def test_is_isomorphic_4C(self): # Tests is_isomorphic on test case #4C (3 flat clusters, different labelings, isomorphic) - a = [7, 2, 3] - b = [6, 3, 2] + a = np.asarray([7, 2, 3]) + b = np.asarray([6, 3, 2]) assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) @@ -326,7 +329,9 @@ def test_is_isomorphic_6(self): def test_is_isomorphic_7(self): # Regression test for gh-6271 - assert_(not is_isomorphic([1, 2, 3], [1, 1, 1])) + a = np.asarray([1, 2, 3]) + b = np.asarray([1, 1, 1]) + assert_(not is_isomorphic(a, b)) def help_is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0): for k in range(3): @@ -1018,18 +1023,18 @@ def calculate_maximum_inconsistencies(Z, R, k=3): def test_unsupported_uncondensed_distance_matrix_linkage_warning(): - assert_warns(ClusterWarning, linkage, [[0, 1], [1, 0]]) + assert_warns(ClusterWarning, linkage, np.asarray([[0, 1], [1, 0]])) def test_euclidean_linkage_value_error(): for method in scipy.cluster.hierarchy._EUCLIDEAN_METHODS: - assert_raises(ValueError, linkage, [[1, 1], [1, 1]], + assert_raises(ValueError, linkage, np.asarray([[1, 1], [1, 1]]), method=method, metric='cityblock') def test_2x2_linkage(): - Z1 = linkage([1], method='single', metric='euclidean') - Z2 = linkage([[0, 1], [0, 0]], method='single', metric='euclidean') + Z1 = linkage(np.asarray([1]), method='single', metric='euclidean') + Z2 = linkage(np.asarray([[0, 1], [0, 0]]), method='single', metric='euclidean') assert_allclose(Z1, Z2) From 9da0cd95c57f08aa96325eb4a9e5c6a5187946af Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 20:31:07 +0200 Subject: [PATCH 37/87] MAINT: dtype('float64') to float64 --- scipy/cluster/hierarchy.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index 58975d785464..f8bd82e836fe 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -1847,7 +1847,7 @@ def from_mlab_linkage(Z): """ xp = array_namespace(Z) - Z = as_xparray(Z, dtype=xp.dtype('float64'), order='c', xp=xp) + Z = as_xparray(Z, dtype=xp.float64, order='c', xp=xp) Zs = Z.shape # If it's empty, return it. @@ -2137,7 +2137,7 @@ def is_valid_im(R, warning=False, throw=False, name=None): valid = True name_str = "%r " % name if name else '' try: - if R.dtype != xp.dtype('float64'): + if R.dtype != xp.float64: raise TypeError('Inconsistency matrix %smust contain doubles ' '(double).' % name_str) if len(R.shape) != 2: @@ -2254,7 +2254,7 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): valid = True name_str = "%r " % name if name else '' try: - if Z.dtype != xp.dtype('float64'): + if Z.dtype != xp.float64: raise TypeError('Linkage matrix %smust contain doubles.' % name_str) if len(Z.shape) != 2: raise ValueError('Linkage matrix %smust have shape=2 (i.e. be ' @@ -2689,7 +2689,7 @@ def fclusterdata(X, t, criterion='inconsistent', """ xp = array_namespace(X) - X = as_xparray(X, order='c', dtype=xp.dtype('float64')) + X = as_xparray(X, order='c', dtype=xp.float64) if len(X.shape) != 2: raise TypeError('The observation matrix X must be an n by m ' @@ -3853,7 +3853,7 @@ def maxdists(Z): """ xp = array_namespace(Z) - Z = as_xparray(Z, order='c', dtype=xp.dtype('float64'), xp=xp) + Z = as_xparray(Z, order='c', dtype=xp.float64, xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 From 85c7045b5b34fe0f12fb883fc3fce1a3ba3de81d Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 20:57:09 +0200 Subject: [PATCH 38/87] BUG: fix isfinite for torch --- scipy/_lib/_array_api.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 6945dbaf806a..f9ab8a37fd1f 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -45,11 +45,12 @@ def compliance_scipy(*arrays): def _check_finite(array, xp): """Check for NaNs or Infs.""" - # same as np.asarray_chkfinite - if array.dtype.char in typecodes['AllFloat'] and not xp.isfinite(array).all(): - raise ValueError( - "array must not contain infs or NaNs" - ) + msg = "array must not contain infs or NaNs" + try: + if not xp.isfinite(array).all(): + raise ValueError(msg) + except TypeError: + raise ValueError(msg) def array_namespace(*arrays): From 8bc56abbe1b8d1d2ce1752412d803e2eca5d8ce1 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 12 May 2023 21:10:20 +0200 Subject: [PATCH 39/87] TST: start to adjust test_vq for PyTorch --- scipy/cluster/tests/test_vq.py | 44 +++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index bdcee068ffe2..9b3e29a0b465 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -11,7 +11,7 @@ from scipy.cluster.vq import (kmeans, kmeans2, py_vq, vq, whiten, ClusterError, _krandinit) from scipy.cluster import _vq -from scipy.conftest import skip_if_array_api +from scipy.conftest import skip_if_array_api, array_api_compatible from scipy.sparse._sputils import matrix from scipy._lib._array_api import SCIPY_ARRAY_API @@ -73,13 +73,14 @@ class TestWhiten: - def test_whiten(self): - desired = np.array([[5.08738849, 2.97091878], + @array_api_compatible + def test_whiten(self, xp): + desired = xp.asarray([[5.08738849, 2.97091878], [3.19909255, 0.69660580], [4.51041982, 0.02640918], [4.38567074, 0.95120889], [2.32191480, 1.63195503]]) - arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] for tp in arrays: obs = tp([[0.98744510, 0.82766775], [0.62093317, 0.19406729], @@ -88,11 +89,12 @@ def test_whiten(self): [0.45067590, 0.45464607]]) assert_allclose(whiten(obs), desired, rtol=1e-5) - def test_whiten_zero_std(self): + @array_api_compatible + def test_whiten_zero_std(self, xp): desired = np.array([[0., 1.0, 2.86666544], [0., 1.0, 1.32460034], [0., 1.0, 3.74382172]]) - arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] for tp in arrays: obs = tp([[0., 1., 0.74109533], [0., 1., 0.34243798], @@ -103,10 +105,11 @@ def test_whiten_zero_std(self): assert_equal(len(w), 1) assert_(issubclass(w[-1].category, RuntimeWarning)) - def test_whiten_not_finite(self): - arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + @array_api_compatible + def test_whiten_not_finite(self, xp): + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] for tp in arrays: - for bad_value in np.nan, np.inf, -np.inf: + for bad_value in xp.nan, xp.inf, -xp.inf: obs = tp([[0.98744510, bad_value], [0.62093317, 0.19406729], [0.87545741, 0.00735733], @@ -116,17 +119,18 @@ def test_whiten_not_finite(self): class TestVq: - def test_py_vq(self): + @array_api_compatible + def test_py_vq(self, xp): initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] for tp in arrays: label1 = py_vq(tp(X), tp(initc))[0] assert_array_equal(label1, LABEL1) - def test_vq(self): + @skip_if_array_api + def test_vq(self, xp): initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] - for tp in arrays: + for tp in [np.asarray, matrix]: label1, dist = _vq.vq(tp(X), tp(initc)) assert_array_equal(label1, LABEL1) tlabel1, tdist = vq(tp(X), tp(initc)) @@ -194,10 +198,11 @@ def test_large_features(self): kmeans(data, np.asarray(2)) - def test_kmeans_simple(self): + @array_api_compatible + def test_kmeans_simple(self, xp): np.random.seed(54321) initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] for tp in arrays: code1 = kmeans(tp(X), tp(initc), iter=1)[0] assert_array_almost_equal(code1, CODET2) @@ -218,10 +223,11 @@ def test_kmeans_lost_cluster(self): assert_raises(ClusterError, kmeans2, data, initk, missing='raise') - def test_kmeans2_simple(self): + @array_api_compatible + def test_kmeans2_simple(self, xp): np.random.seed(12345678) - initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - arrays = [np.array] if SCIPY_ARRAY_API else [np.array, matrix] + initc = xp.asarray(np.concatenate([[X[0]], [X[1]], [X[2]]])) + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] for tp in arrays: code1 = kmeans2(tp(X), tp(initc), iter=1)[0] code2 = kmeans2(tp(X), tp(initc), iter=2)[0] From 2eb779197578a69440ce5f2e25df48a1d33ec63f Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 15 May 2023 12:11:45 +0200 Subject: [PATCH 40/87] ENH: add size helper --- scipy/_lib/_array_api.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index f9ab8a37fd1f..95199391c252 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -6,6 +6,7 @@ The SciPy use case of the Array API is described on the following page: https://data-apis.org/array-api/latest/use_cases.html#use-case-scipy """ +import math import os import numpy as np @@ -185,3 +186,21 @@ def to_numpy(array, xp): return array.get() return np.asarray(array) + + +def size(x): + """Return the total number of elements of `x`. + + From Scikit-Learn. + + Parameters + ---------- + x : array + Array instance from NumPy or an Array API compatible library. + + Returns + ------- + out : int + Total number of elements. + """ + return math.prod(x.shape) From 45962470cae0156f589e497f0871bf0125e599aa Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 15 May 2023 12:14:15 +0200 Subject: [PATCH 41/87] MAINT: refactor np.size usages --- scipy/cluster/vq.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 3b2aa6bd604b..000ae819978c 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -67,7 +67,7 @@ import warnings import numpy as np from collections import deque -from scipy._lib._array_api import as_xparray, array_namespace +from scipy._lib._array_api import as_xparray, array_namespace, size from scipy._lib._util import check_random_state, rng_integers from scipy.spatial.distance import cdist @@ -456,11 +456,11 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, raise ValueError("iter must be at least 1, got %s" % iter) # Determine whether a count (scalar) or an initial guess (array) was passed. - if guess.size > 1: - if guess.size < 1: + if size(guess) > 1: + if size(guess) < 1: raise ValueError("Asked for 0 clusters. Initial book was %s" % guess) - elif guess.size > 1: + elif size(guess) > 1: return _kmeans(obs, guess, thresh=thresh, xp=xp) # k_or_guess is a scalar, now verify that it's an integer @@ -540,7 +540,7 @@ def _krandinit(data, k, rng, xp): elif data.shape[1] > data.shape[0]: # initialize when the covariance matrix is rank deficient _, s, vh = xp.linalg.svd(data - mu, full_matrices=False) - x = rng.standard_normal(size=(k, s.size)) + x = rng.standard_normal(size=(k, size(s))) sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - 1) x = x.dot(sVh) else: @@ -548,7 +548,7 @@ def _krandinit(data, k, rng, xp): # k rows, d cols (one row = one obs) # Generate k sample of a random variable ~ Gaussian(mu, cov) - x = rng.standard_normal(size=(k, mu.size)) + x = rng.standard_normal(size=(k, size(mu))) x = x.dot(xp.linalg.cholesky(cov).T) x += mu @@ -760,11 +760,11 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', else: raise ValueError("Input of rank > 2 is not supported.") - if data.size < 1 or code_book.size < 1: + if size(data) < 1 or size(code_book) < 1: raise ValueError("Empty input is not supported.") # If k is not a single value, it should be compatible with data's shape - if minit == 'matrix' or code_book.size > 1: + if minit == 'matrix' or size(code_book) > 1: if data.ndim != code_book.ndim: raise ValueError("k array doesn't match data rank") nc = len(code_book) From 67ac6e03a0bbb80c50815a23f914087dcdcb8635 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 15 May 2023 12:36:04 +0200 Subject: [PATCH 42/87] MAINT: size from array_api_compat --- scipy/_lib/_array_api.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 95199391c252..906a2a687d00 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -10,9 +10,9 @@ import os import numpy as np -from numpy.core.numerictypes import typecodes # probably want to vendor it (submodule) import array_api_compat +from array_api_compat import size import array_api_compat.numpy __all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace'] @@ -186,21 +186,3 @@ def to_numpy(array, xp): return array.get() return np.asarray(array) - - -def size(x): - """Return the total number of elements of `x`. - - From Scikit-Learn. - - Parameters - ---------- - x : array - Array instance from NumPy or an Array API compatible library. - - Returns - ------- - out : int - Total number of elements. - """ - return math.prod(x.shape) From 52e982d17f5baa927d30807214115ec10f52db8a Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 15 May 2023 12:45:26 +0200 Subject: [PATCH 43/87] MAINT: add isdtype from scikit-learn --- scipy/_lib/_array_api.py | 47 ++++++++++++++++++++++++++++++++++++++++ scipy/cluster/vq.py | 19 ++++++++++------ 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 906a2a687d00..dd0c5d6d1eaa 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -186,3 +186,50 @@ def to_numpy(array, xp): return array.get() return np.asarray(array) + + +def isdtype(dtype, kind, *, xp): + """Returns a boolean indicating whether a provided dtype is of type "kind". + + Included in the v2022.12 of the Array API spec. + https://data-apis.org/array-api/latest/API_specification/generated/array_api.isdtype.html + """ + if isinstance(kind, tuple): + return any(_isdtype_single(dtype, k, xp=xp) for k in kind) + else: + return _isdtype_single(dtype, kind, xp=xp) + + +def _isdtype_single(dtype, kind, *, xp): + if isinstance(kind, str): + if kind == "bool": + return dtype == xp.bool + elif kind == "signed integer": + return dtype in {xp.int8, xp.int16, xp.int32, xp.int64} + elif kind == "unsigned integer": + return dtype in {xp.uint8, xp.uint16, xp.uint32, xp.uint64} + elif kind == "integral": + return any( + _isdtype_single(dtype, k, xp=xp) + for k in ("signed integer", "unsigned integer") + ) + elif kind == "real floating": + return dtype in {xp.float32, xp.float64} + elif kind == "complex floating": + # Some name spaces do not have complex, such as cupy.array_api + # and numpy.array_api + complex_dtypes = set() + if hasattr(xp, "complex64"): + complex_dtypes.add(xp.complex64) + if hasattr(xp, "complex128"): + complex_dtypes.add(xp.complex128) + return dtype in complex_dtypes + elif kind == "numeric": + return any( + _isdtype_single(dtype, k, xp=xp) + for k in ("integral", "real floating", "complex floating") + ) + else: + raise ValueError(f"Unrecognized data type kind: {kind!r}") + else: + return dtype == kind diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 000ae819978c..2210a1e9731b 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -67,7 +67,7 @@ import warnings import numpy as np from collections import deque -from scipy._lib._array_api import as_xparray, array_namespace, size +from scipy._lib._array_api import as_xparray, array_namespace, size, isdtype from scipy._lib._util import check_random_state, rng_integers from scipy.spatial.distance import cdist @@ -201,13 +201,16 @@ def vq(obs, code_book, check_finite=True): xp = array_namespace(obs, code_book) obs = as_xparray(obs, xp=xp, check_finite=check_finite) code_book = as_xparray(code_book, xp=xp, check_finite=check_finite) - ct = xp.common_type(obs, code_book) + ct = xp.result_type(obs, code_book) - c_obs = obs.astype(ct, copy=False) - c_code_book = code_book.astype(ct, copy=False) + c_obs = xp.astype(obs, ct, copy=False) + c_code_book = xp.astype(code_book, ct, copy=False) - if xp.issubdtype(ct, xp.float64) or xp.issubdtype(ct, xp.float32): - return _vq.vq(c_obs, c_code_book) + if isdtype(ct, kind='real floating', xp=xp): + c_obs = np.asarray(c_obs) + c_code_book = np.asarray(c_code_book) + result = _vq.vq(c_obs, c_code_book) + return xp.asarray(result) return py_vq(obs, code_book, check_finite=False) @@ -791,6 +794,8 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', # Compute the nearest neighbor for each obs using the current code book label = vq(data, code_book, check_finite=check_finite)[0] # Update the code book by computing centroids + data = np.asarray(data) + label = np.asarray(label) new_code_book, has_members = _vq.update_cluster_means(data, label, nc) if not has_members.all(): miss_meth() @@ -798,4 +803,4 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', new_code_book[~has_members] = code_book[~has_members] code_book = new_code_book - return code_book, label + return xp.asarray(code_book), xp.asarray(label) From d8f66ddf2551080a627ec2b35ed3bbf4bf7a5e2a Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 15 May 2023 12:51:46 +0200 Subject: [PATCH 44/87] MAINT: some array conversion --- scipy/cluster/vq.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 2210a1e9731b..c87f864c80b4 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -210,7 +210,7 @@ def vq(obs, code_book, check_finite=True): c_obs = np.asarray(c_obs) c_code_book = np.asarray(c_code_book) result = _vq.vq(c_obs, c_code_book) - return xp.asarray(result) + return xp.asarray(result[0]), xp.asarray(result[1]) return py_vq(obs, code_book, check_finite=False) @@ -309,8 +309,14 @@ def _kmeans(obs, guess, thresh=1e-5, xp=None): obs_code, distort = vq(obs, code_book, check_finite=False) prev_avg_dists.append(distort.mean(axis=-1)) # recalc code_book as centroids of associated obs + obs = np.asarray(obs) + obs_code = np.asarray(obs_code) code_book, has_members = _vq.update_cluster_means(obs, obs_code, code_book.shape[0]) + obs = xp.asarray(obs) + obs_code = xp.asarray(obs_code) + code_book = xp.asarray(code_book) + has_members = xp.asarray(has_members) code_book = code_book[has_members] diff = xp.absolute(prev_avg_dists[0] - prev_avg_dists[1]) From 488e3c3388f66f5a7efc16dc8095ec4f10464bfb Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 15 May 2023 13:45:26 +0200 Subject: [PATCH 45/87] TST: add more coverage for vq --- scipy/cluster/tests/test_vq.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 9b3e29a0b465..1f5ae223b586 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -128,7 +128,7 @@ def test_py_vq(self, xp): assert_array_equal(label1, LABEL1) @skip_if_array_api - def test_vq(self, xp): + def test_vq(self): initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) for tp in [np.asarray, matrix]: label1, dist = _vq.vq(tp(X), tp(initc)) @@ -302,11 +302,12 @@ def test_kmeans_0k(self): assert_raises(ValueError, kmeans2, X, 0) assert_raises(ValueError, kmeans2, X, np.array([])) - def test_kmeans_large_thres(self): + @array_api_compatible + def test_kmeans_large_thres(self, xp): # Regression test for gh-1774 - x = np.array([1, 2, 3, 4, 10], dtype=float) - res = kmeans(x, np.asarray(1), thresh=1e16) - assert_allclose(res[0], np.array([4.])) + x = xp.asarray([1, 2, 3, 4, 10], dtype=float) + res = kmeans(x, xp.asarray(1), thresh=1e16) + assert_allclose(res[0], xp.asarray([4.])) assert_allclose(res[1], 2.3999999999999999) def test_kmeans2_kpp_low_dim(self): @@ -317,7 +318,8 @@ def test_kmeans2_kpp_low_dim(self): res, _ = kmeans2(TESTDATA_2D, np.asarray(2), minit='++') assert_allclose(res, prev_res) - def test_kmeans2_kpp_high_dim(self): + @array_api_compatible + def test_kmeans2_kpp_high_dim(self, xp): # Regression test for gh-11462 n_dim = 100 size = 10 @@ -328,14 +330,17 @@ def test_kmeans2_kpp_high_dim(self): np.random.multivariate_normal(centers[0], np.eye(n_dim), size=size), np.random.multivariate_normal(centers[1], np.eye(n_dim), size=size) ]) - res, _ = kmeans2(data, np.asarray(2), minit='++') + + data = xp.asarray(data) + res, _ = kmeans2(data, xp.asarray(2), minit='++') assert_array_almost_equal(res, centers, decimal=0) - def test_kmeans_diff_convergence(self): + @array_api_compatible + def test_kmeans_diff_convergence(self, xp): # Regression test for gh-8727 - obs = np.array([-3, -1, 0, 1, 1, 8], float) - res = kmeans(obs, np.array([-3., 0.99])) - assert_allclose(res[0], np.array([-0.4, 8.])) + obs = xp.asarray([-3, -1, 0, 1, 1, 8], dtype=xp.float64) + res = kmeans(obs, xp.asarray([-3., 0.99])) + assert_allclose(res[0], xp.asarray([-0.4, 8.])) assert_allclose(res[1], 1.0666666666666667) @skip_if_array_api From 77feb2fb2167a6cd9e14565df0f4b75973d64061 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 15 May 2023 13:45:54 +0200 Subject: [PATCH 46/87] BUG: some fix for _convert_to_double --- scipy/cluster/hierarchy.py | 18 +++++++++--------- scipy/cluster/tests/test_hierarchy.py | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index f8bd82e836fe..e6dd9d181bdf 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -1022,7 +1022,7 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): raise ValueError(f"Invalid method: {method}") xp = array_namespace(y) - y = _convert_to_double(xp.asarray(y, order='c')) + y = _convert_to_double(as_xparray(y, order='c', xp=xp), xp=xp) if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') @@ -1523,7 +1523,7 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') - y = _convert_to_double(as_xparray(y, order='c', xp=xp)) + y = _convert_to_double(as_xparray(y, order='c', xp=xp), xp=xp) if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') @@ -1545,17 +1545,17 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): return _optimal_leaf_ordering.optimal_leaf_ordering(Z, y) -def _convert_to_bool(X): - if X.dtype != bool: - X = X.astype(bool) +def _convert_to_bool(X, xp): + if X.dtype != xp.bool: + X = xp.astype(X, bool) if not X.flags.contiguous: X = X.copy() return X -def _convert_to_double(X): - if X.dtype != np.double: - X = X.astype('float64') +def _convert_to_double(X, xp): + if X.dtype != xp.float64: + X = xp.astype(X, xp.float64) if not X.flags.contiguous: X = X.copy() return X @@ -1677,7 +1677,7 @@ def cophenet(Z, Y=None): zz = np.zeros((n * (n-1)) // 2, dtype=np.double) # Since the C code does not support striding using strides. # The dimensions are used instead. - Z = _convert_to_double(Z) + Z = _convert_to_double(Z, xp=xp) Z = np.asarray(Z) _hierarchy.cophenetic_distances(Z, zz, int(n)) diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index f49951975fb1..b46ef10ca9a2 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -47,7 +47,7 @@ _order_cluster_tree, _hierarchy, _LINKAGE_METHODS) from scipy.spatial.distance import pdist from scipy.cluster._hierarchy import Heap -from scipy.conftest import skip_if_array_api +from scipy.conftest import skip_if_array_api, array_api_compatible from . import hierarchy_test_data @@ -66,11 +66,12 @@ class TestLinkage: - def test_linkage_non_finite_elements_in_distance_matrix(self): + @array_api_compatible + def test_linkage_non_finite_elements_in_distance_matrix(self, xp): # Tests linkage(Y) where Y contains a non-finite element (e.g. NaN or Inf). # Exception expected. - y = np.zeros((6,)) - y[0] = np.nan + y = xp.zeros((6,)) + y[0] = xp.nan assert_raises(ValueError, linkage, y) def test_linkage_empty_distance_matrix(self): @@ -78,13 +79,14 @@ def test_linkage_empty_distance_matrix(self): y = np.zeros((0,)) assert_raises(ValueError, linkage, y) - def test_linkage_tdist(self): + @array_api_compatible + def test_linkage_tdist(self, xp): for method in ['single', 'complete', 'average', 'weighted']: - self.check_linkage_tdist(method) + self.check_linkage_tdist(method, xp) - def check_linkage_tdist(self, method): + def check_linkage_tdist(self, method, xp): # Tests linkage(Y, method) on the tdist data set. - Z = linkage(hierarchy_test_data.ytdist, method) + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), method) expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_' + method) assert_allclose(Z, expectedZ, atol=1e-10) From d2d2e8d725a5dc5cc2848b887ff58aaf8ff8f8a9 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 23 May 2023 15:08:33 -0700 Subject: [PATCH 47/87] FIX: std error --- scipy/cluster/vq.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index c87f864c80b4..2dc97729f38c 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -129,8 +129,9 @@ def whiten(obs, check_finite=True): [ 1.75976538, 0.7038557 , 7.21248917]]) """ - obs = as_xparray(obs, check_finite=check_finite) - std_dev = obs.std(axis=0) + xp = array_namespace(obs) + obs = as_xparray(obs, check_finite=check_finite, xp=xp) + std_dev = xp.std(obs, axis=0) zero_std_mask = std_dev == 0 if zero_std_mask.any(): std_dev[zero_std_mask] = 1.0 From e395445d7217183ffe2c3e91e4a14fc8491a9c4f Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 23 May 2023 16:11:19 -0700 Subject: [PATCH 48/87] MAINT: return non vanilla np but array API --- scipy/_lib/_array_api.py | 2 +- scipy/conftest.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index dd0c5d6d1eaa..680bc7ccc269 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -82,7 +82,7 @@ def array_namespace(*arrays): """ if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: # here we could wrap the namespace if needed - return np + return array_api_compat.numpy arrays = [array for array in arrays if array is not None] diff --git a/scipy/conftest.py b/scipy/conftest.py index 3d2601f5ef18..59e213808f67 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -100,9 +100,11 @@ def check_fpu_mode(request): # Array API backend handling -array_api_backends = (np,) +array_api_backends = [np] if SCIPY_ARRAY_API: + array_api_backends = [np, numpy.array_api] + array_api_available_backends = { 'numpy': np, 'numpy.array_api': numpy.array_api, } @@ -110,11 +112,10 @@ def check_fpu_mode(request): try: import torch array_api_available_backends.update({'pytorch': torch}) + array_api_backends.append(torch) except ImportError: pass - array_api_backends = (np, numpy.array_api) - if SCIPY_ARRAY_API.lower() != "true": SCIPY_ARRAY_API = json.loads(SCIPY_ARRAY_API) From 1146129db70cb1fa23644c37f8d4b5b066a6a2c5 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 23 May 2023 16:18:07 -0700 Subject: [PATCH 49/87] BUG: fix benchmark using float128 --- benchmarks/benchmarks/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmarks/cluster.py b/benchmarks/benchmarks/cluster.py index 1d33608c1866..a7dc73564aff 100644 --- a/benchmarks/benchmarks/cluster.py +++ b/benchmarks/benchmarks/cluster.py @@ -50,7 +50,7 @@ def time_kmeans2(self, k, init): class VQ(Benchmark): - params = [[2, 10, 50], ['float32', 'float64', 'float128']] + params = [[2, 10, 50], ['float32', 'float64']] param_names = ['k', 'dtype'] def __init__(self): From bc6c61bf566295e5731fdf2312eb7db5dc46eeb0 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 23 May 2023 16:19:44 -0700 Subject: [PATCH 50/87] MAINT: mitigate dtype conversion --- scipy/cluster/hierarchy.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index e6dd9d181bdf..11e6fca47c02 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -134,7 +134,7 @@ import numpy as np from . import _hierarchy, _optimal_leaf_ordering import scipy.spatial.distance as distance -from scipy._lib._array_api import array_namespace, as_xparray +from scipy._lib._array_api import array_namespace, as_xparray, isdtype from scipy._lib._disjoint_set import DisjointSet @@ -164,12 +164,7 @@ def _copy_array_if_base_present(a): """ Copy the array if its base points to a parent array. """ - if a.base is not None: - return a.copy() - elif np.issubsctype(a, np.float32): - return a.astype('float64') - else: - return a + return a def _copy_arrays_if_base_present(T): @@ -1548,16 +1543,14 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): def _convert_to_bool(X, xp): if X.dtype != xp.bool: X = xp.astype(X, bool) - if not X.flags.contiguous: - X = X.copy() + X = as_xparray(X, copy=True, xp=xp) return X def _convert_to_double(X, xp): if X.dtype != xp.float64: X = xp.astype(X, xp.float64) - if not X.flags.contiguous: - X = X.copy() + X = as_xparray(X, copy=True, xp=xp) return X From 61d58601d3d6300c4c2ac84b5beafc0701b9bf42 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 23 May 2023 16:53:28 -0700 Subject: [PATCH 51/87] TST: fix array API name --- scipy/_lib/_array_api.py | 2 +- scipy/_lib/tests/test_array_api.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 680bc7ccc269..dc8a7a2ce27f 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -41,7 +41,7 @@ def compliance_scipy(*arrays): elif not array_api_compat.is_array_api_obj(array): raise TypeError("Only support Array API compatible arrays") elif array.dtype is np.dtype('O'): - raise ValueError('object arrays are not supported') + raise TypeError('object arrays are not supported') def _check_finite(array, xp): diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index f27c9c6cb0e2..84d77c3328ac 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -23,7 +23,7 @@ def test_array_namespace(): _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False xp = array_namespace(x, y) - assert xp.__name__ == 'numpy' + assert xp.__name__ == 'array_api_compat.numpy' _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True @@ -46,7 +46,7 @@ def test_as_xparray_namespace(): _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False x, y, xp_ = as_xparray_namespace(x, y) - assert xp_.__name__ == 'numpy' + assert xp_.__name__ == 'array_api_compat.numpy' _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True From dd2cab5d005d93b93469e8453b83476438a9332e Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 23 May 2023 17:35:34 -0700 Subject: [PATCH 52/87] BUG: fix xp.all --- .github/workflows/array_api.yml | 4 +++- scipy/_lib/_array_api.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index a2feb865384e..87918f482e67 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -82,4 +82,6 @@ jobs: run: | export OMP_NUM_THREADS=2 export SCIPY_USE_PROPACK=1 - python dev.py --no-build test --array-api-backend pytorch -s cluster -- --durations 10 --timeout=60 + # remove -s cluster for the real PR probably + python dev.py --no-build test --array-api-backend pytorch --array-api-backend numpy -s cluster -- --durations 10 --timeout=60 + python dev.py --no-build test --array-api-backend all --tests scipy/_lib/tests/test_array_api.py diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index dc8a7a2ce27f..4198a924b04a 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -48,7 +48,7 @@ def _check_finite(array, xp): """Check for NaNs or Infs.""" msg = "array must not contain infs or NaNs" try: - if not xp.isfinite(array).all(): + if not xp.all(xp.isfinite(array)): raise ValueError(msg) except TypeError: raise ValueError(msg) From 04698871cdaafcfd624443208836a4619571d040 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 1 Jun 2023 13:34:21 +0200 Subject: [PATCH 53/87] ENH: add cupy support --- scipy/conftest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scipy/conftest.py b/scipy/conftest.py index 59e213808f67..0776bb77a4c3 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -116,6 +116,13 @@ def check_fpu_mode(request): except ImportError: pass + try: + import cupy + array_api_available_backends.update({'cupy': cupy}) + array_api_backends.append(cupy) + except ImportError: + pass + if SCIPY_ARRAY_API.lower() != "true": SCIPY_ARRAY_API = json.loads(SCIPY_ARRAY_API) From 276d3f5111d3845c6a476749d2588583ccfdf588 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 1 Jun 2023 13:52:06 +0200 Subject: [PATCH 54/87] ENH: add support for PyTorch mps mode --- scipy/_lib/_array_api.py | 6 +++++- scipy/conftest.py | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 4198a924b04a..c9e323a3913c 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -21,8 +21,12 @@ # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn array_api_dispatch = os.environ.get("array_api_dispatch", False) SCIPY_ARRAY_API = os.environ.get("SCIPY_ARRAY_API", array_api_dispatch) +SCIPY_TORCH_DEVICE = os.environ.get("SCIPY_TORCH_DEVICE", "cpu") -_GLOBAL_CONFIG = {"SCIPY_ARRAY_API": SCIPY_ARRAY_API} +_GLOBAL_CONFIG = { + "SCIPY_ARRAY_API": SCIPY_ARRAY_API, + "SCIPY_TORCH_DEVICE": SCIPY_TORCH_DEVICE, +} def compliance_scipy(*arrays): diff --git a/scipy/conftest.py b/scipy/conftest.py index 0776bb77a4c3..d6f433db086e 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -11,7 +11,7 @@ from scipy._lib._fpumode import get_fpu_mode from scipy._lib._testutils import FPUModeChangeWarning from scipy._lib import _pep440 -from scipy._lib._array_api import SCIPY_ARRAY_API +from scipy._lib._array_api import SCIPY_ARRAY_API, SCIPY_TORCH_DEVICE def pytest_configure(config): @@ -113,6 +113,7 @@ def check_fpu_mode(request): import torch array_api_available_backends.update({'pytorch': torch}) array_api_backends.append(torch) + torch.set_default_device(SCIPY_TORCH_DEVICE) except ImportError: pass From 06e59f3aa73f786ff46fc0c562d26d6ddb996390 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 1 Jun 2023 14:04:19 +0200 Subject: [PATCH 55/87] MAINT: simplify backend selection logic --- scipy/conftest.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/scipy/conftest.py b/scipy/conftest.py index d6f433db086e..ec7037b7931d 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -100,46 +100,44 @@ def check_fpu_mode(request): # Array API backend handling -array_api_backends = [np] +xp_available_backends = {'numpy': np} if SCIPY_ARRAY_API: - array_api_backends = [np, numpy.array_api] - - array_api_available_backends = { - 'numpy': np, 'numpy.array_api': numpy.array_api, - } + # fill the dict of backends with available libraries + xp_available_backends.update({'numpy.array_api': numpy.array_api}) try: import torch - array_api_available_backends.update({'pytorch': torch}) - array_api_backends.append(torch) + xp_available_backends.update({'pytorch': torch}) + # can use `mps` or `cpu` torch.set_default_device(SCIPY_TORCH_DEVICE) except ImportError: pass try: import cupy - array_api_available_backends.update({'cupy': cupy}) - array_api_backends.append(cupy) + xp_available_backends.update({'cupy': cupy}) except ImportError: pass + # by default, use all available backends if SCIPY_ARRAY_API.lower() != "true": SCIPY_ARRAY_API = json.loads(SCIPY_ARRAY_API) if 'all' in SCIPY_ARRAY_API: - array_api_backends = array_api_available_backends.values() + pass # same as True else: + # only select a subset of backend by filtering out the dict try: - array_api_backends = [ - array_api_available_backends[backend] + xp_available_backends = { + 'backend': xp_available_backends[backend] for backend in SCIPY_ARRAY_API - ] + } except KeyError: - msg = f"'--array-api-backend' must be in {array_api_available_backends}" + msg = f"'--array-api-backend' must be in {xp_available_backends.keys()}" raise ValueError(msg) -array_api_compatible = pytest.mark.parametrize("xp", array_api_backends) +array_api_compatible = pytest.mark.parametrize("xp", xp_available_backends.values()) skip_if_array_api = pytest.mark.skipif( SCIPY_ARRAY_API, From 82a8aa3c9e8263fb7cd141287d8ce37e3937fead Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 1 Jun 2023 14:15:32 +0200 Subject: [PATCH 56/87] MAINT/TST: add device and skip logic for non CPU --- scipy/_lib/_array_api.py | 4 ++-- scipy/conftest.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index c9e323a3913c..11e05ce1af5d 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -21,11 +21,11 @@ # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn array_api_dispatch = os.environ.get("array_api_dispatch", False) SCIPY_ARRAY_API = os.environ.get("SCIPY_ARRAY_API", array_api_dispatch) -SCIPY_TORCH_DEVICE = os.environ.get("SCIPY_TORCH_DEVICE", "cpu") +SCIPY_DEVICE = os.environ.get("SCIPY_DEVICE", "cpu") _GLOBAL_CONFIG = { "SCIPY_ARRAY_API": SCIPY_ARRAY_API, - "SCIPY_TORCH_DEVICE": SCIPY_TORCH_DEVICE, + "SCIPY_DEVICE": SCIPY_DEVICE, } diff --git a/scipy/conftest.py b/scipy/conftest.py index ec7037b7931d..3740137450c9 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -11,7 +11,7 @@ from scipy._lib._fpumode import get_fpu_mode from scipy._lib._testutils import FPUModeChangeWarning from scipy._lib import _pep440 -from scipy._lib._array_api import SCIPY_ARRAY_API, SCIPY_TORCH_DEVICE +from scipy._lib._array_api import SCIPY_ARRAY_API, SCIPY_DEVICE def pytest_configure(config): @@ -110,13 +110,14 @@ def check_fpu_mode(request): import torch xp_available_backends.update({'pytorch': torch}) # can use `mps` or `cpu` - torch.set_default_device(SCIPY_TORCH_DEVICE) + torch.set_default_device(SCIPY_DEVICE) except ImportError: pass try: import cupy xp_available_backends.update({'cupy': cupy}) + SCIPY_DEVICE = 'cuda' except ImportError: pass @@ -143,3 +144,8 @@ def check_fpu_mode(request): SCIPY_ARRAY_API, reason="do not run with Array API on", ) + +skip_if_array_api_gpu = pytest.mark.skipif( + SCIPY_ARRAY_API and SCIPY_DEVICE != 'cpu', + reason="do not run with Array API on and not on CPU", +) From de2257f83869cb5eacecbcc6d623ae12d1c906d3 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 1 Jun 2023 14:34:37 +0200 Subject: [PATCH 57/87] MAINT: change device for cupy --- scipy/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scipy/conftest.py b/scipy/conftest.py index 3740137450c9..2c1459e021e8 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -117,7 +117,6 @@ def check_fpu_mode(request): try: import cupy xp_available_backends.update({'cupy': cupy}) - SCIPY_DEVICE = 'cuda' except ImportError: pass @@ -138,6 +137,9 @@ def check_fpu_mode(request): msg = f"'--array-api-backend' must be in {xp_available_backends.keys()}" raise ValueError(msg) +if 'cupy' in xp_available_backends: + SCIPY_DEVICE = 'cuda' + array_api_compatible = pytest.mark.parametrize("xp", xp_available_backends.values()) skip_if_array_api = pytest.mark.skipif( From 85fccda619bd40a02e7aabd2a704c16d08fc782e Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 1 Jun 2023 14:36:53 +0200 Subject: [PATCH 58/87] TST: skip some mps incompatible tests --- scipy/cluster/tests/test_vq.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 1f5ae223b586..b1669d2325e9 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -11,7 +11,9 @@ from scipy.cluster.vq import (kmeans, kmeans2, py_vq, vq, whiten, ClusterError, _krandinit) from scipy.cluster import _vq -from scipy.conftest import skip_if_array_api, array_api_compatible +from scipy.conftest import ( + skip_if_array_api, skip_if_array_api_gpu, array_api_compatible +) from scipy.sparse._sputils import matrix from scipy._lib._array_api import SCIPY_ARRAY_API @@ -119,6 +121,7 @@ def test_whiten_not_finite(self, xp): class TestVq: + @skip_if_array_api_gpu @array_api_compatible def test_py_vq(self, xp): initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) @@ -318,6 +321,7 @@ def test_kmeans2_kpp_low_dim(self): res, _ = kmeans2(TESTDATA_2D, np.asarray(2), minit='++') assert_allclose(res, prev_res) + @skip_if_array_api_gpu @array_api_compatible def test_kmeans2_kpp_high_dim(self, xp): # Regression test for gh-11462 From 0c90e8d9edf6bc73592fc3dc7a7aa717f7c08ad4 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 5 Jun 2023 11:18:44 +0200 Subject: [PATCH 59/87] TST/MAINT: address some failures with Cupy --- scipy/cluster/tests/test_vq.py | 14 ++++++++++++-- scipy/cluster/vq.py | 6 ++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index b1669d2325e9..c6ac5a4613c6 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -89,7 +89,11 @@ def test_whiten(self, xp): [0.87545741, 0.00735733], [0.85124403, 0.26499712], [0.45067590, 0.45464607]]) - assert_allclose(whiten(obs), desired, rtol=1e-5) + if "cupy" in xp.__name__: + import cupy as cp + cp.testing.assert_allclose(whiten(obs), desired, rtol=1e-5) + else: + assert_allclose(whiten(obs), desired, rtol=1e-5) @array_api_compatible def test_whiten_zero_std(self, xp): @@ -103,7 +107,13 @@ def test_whiten_zero_std(self, xp): [0., 1., 0.96785929]]) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - assert_allclose(whiten(obs), desired, rtol=1e-5) + + if "cupy" in xp.__name__: + import cupy as cp + cp.testing.assert_allclose(whiten(obs), desired, rtol=1e-5) + else: + assert_allclose(whiten(obs), desired, rtol=1e-5) + assert_equal(len(w), 1) assert_(issubclass(w[-1].category, RuntimeWarning)) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 2dc97729f38c..1b7bbfaaf1cd 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -592,6 +592,12 @@ def _kpp(data, k, rng, xp): """ dims = data.shape[1] if len(data.shape) > 1 else 1 + + # k should be an integer, NOT a NumPy + # scalar array thing... + if not isinstance(k, int): + k = k.item() + init = xp.empty((k, dims)) for i in range(k): From 1f3856f168f2f8110f955b550e058cd77d094c9e Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 12 Jun 2023 23:20:07 +0200 Subject: [PATCH 60/87] TST/MAINT: fix copy and a few conversions --- scipy/_lib/_array_api.py | 2 +- scipy/cluster/hierarchy.py | 38 +- scipy/cluster/tests/test_hierarchy.py | 672 +++++++++++++++----------- 3 files changed, 418 insertions(+), 294 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 11e05ce1af5d..8bd74b392b43 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -15,7 +15,7 @@ from array_api_compat import size import array_api_compat.numpy -__all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace'] +__all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace', 'isdtype'] # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index 11e6fca47c02..d048d193bf16 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -1032,6 +1032,7 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): 'matrix looks suspiciously like an uncondensed ' 'distance matrix') y = distance.pdist(y, metric) + y = xp.asarray(y) else: raise ValueError("`y` must be 1 or 2 dimensional.") @@ -1347,9 +1348,11 @@ def cut_tree(Z, n_clusters=None, height=None): elif height is None and n_clusters is None: # return the full cut tree cols_idx = xp.arange(nobs) elif height is not None: + height = xp.asarray(height) heights = xp.asarray([x.dist for x in nodes]) cols_idx = xp.searchsorted(heights, height) else: + n_clusters = xp.asarray(n_clusters) cols_idx = nobs - xp.searchsorted(xp.arange(nobs), n_clusters) try: @@ -1365,7 +1368,7 @@ def cut_tree(Z, n_clusters=None, height=None): for i, node in enumerate(nodes): idx = node.pre_order() - this_group = last_group.copy() + this_group = as_xparray(last_group, copy=True, xp=xp) this_group[idx] = last_group[idx].min() this_group[this_group > last_group[idx].max()] -= 1 if i + 1 in cols_idx: @@ -1530,14 +1533,17 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): 'matrix looks suspiciously like an uncondensed ' 'distance matrix') y = distance.pdist(y, metric) + y = xp.asarray(y) else: raise ValueError("`y` must be 1 or 2 dimensional.") - if not np.all(np.isfinite(y)): + if not xp.all(xp.isfinite(y)): raise ValueError("The condensed distance matrix must contain only " "finite values.") - return _optimal_leaf_ordering.optimal_leaf_ordering(Z, y) + Z = np.asarray(Z) + y = np.asarray(y) + return xp.asarray(_optimal_leaf_ordering.optimal_leaf_ordering(Z, y)) def _convert_to_bool(X, xp): @@ -1763,7 +1769,7 @@ def inconsistent(Z, d=2): n = Zs[0] + 1 R = np.zeros((n - 1, 4), dtype=np.double) - Z = xp.asarray(Z) + Z = np.asarray(Z) _hierarchy.inconsistent(Z, R, int(n), int(d)) R = xp.asarray(R) return R @@ -1845,16 +1851,16 @@ def from_mlab_linkage(Z): # If it's empty, return it. if len(Zs) == 0 or (len(Zs) == 1 and Zs[0] == 0): - return Z.copy() + return as_xparray(Z, copy=True, xp=xp) if len(Zs) != 2: raise ValueError("The linkage array must be rectangular.") # If it contains no rows, return it. if Zs[0] == 0: - return Z.copy() + return as_xparray(Z, copy=True, xp=xp) - Zpart = Z.copy() + Zpart = as_xparray(Z, copy=True, xp=xp) if Zpart[:, 0:2].min() != 1.0 and Zpart[:, 0:2].max() != 2 * Zs[0]: raise ValueError('The format of the indices is not 1..N') @@ -1941,13 +1947,14 @@ def to_mlab_linkage(Z): the original linkage matrix has been dropped. """ - Z = as_xparray(Z, order='c', dtype=np.double) + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', dtype=xp.float64) Zs = Z.shape if len(Zs) == 0 or (len(Zs) == 1 and Zs[0] == 0): - return Z.copy() + return as_xparray(Z, copy=True, xp=xp) is_valid_linkage(Z, throw=True, name='Z') - ZP = Z[:, 0:3].copy() + ZP = as_xparray(Z[:, 0:3], copy=True, xp=xp) ZP[:, 0:2] += 1.0 return ZP @@ -2571,6 +2578,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): [Z] = _copy_arrays_if_base_present([Z]) Z = np.asarray(Z) + monocrit = np.asarray(monocrit) if criterion == 'inconsistent': if R is None: R = inconsistent(Z, depth) @@ -2689,6 +2697,7 @@ def fclusterdata(X, t, criterion='inconsistent', 'array.') Y = distance.pdist(X, metric=metric) + Y = xp.asarray(Y) Z = linkage(Y, method=method) if R is None: R = inconsistent(Z, d=depth) @@ -3742,9 +3751,8 @@ def is_isomorphic(T1, T2): True """ - xp = array_namespace(T1, T2) - T1 = as_xparray(T1, order='c', xp=xp) - T2 = as_xparray(T2, order='c', xp=xp) + T1 = np.asarray(T1, order='c') + T2 = np.asarray(T2, order='c') T1S = T1.shape T2S = T2.shape @@ -4158,7 +4166,7 @@ def leaders(Z, T): xp = array_namespace(Z, T) Z = as_xparray(Z, order='c', xp=xp) T = as_xparray(T, order='c', xp=xp) - if T.dtype != 'i': + if not isdtype(T.dtype, kind='signed integer', xp=xp): raise TypeError('T must be a one-dimensional array of integers.') is_valid_linkage(Z, throw=True, name='Z') if len(T) != Z.shape[0] + 1: @@ -4171,7 +4179,7 @@ def leaders(Z, T): n = Z.shape[0] + 1 [Z, T] = _copy_arrays_if_base_present([Z, T]) Z = np.asarray(Z) - T = np.asarray(T) + T = np.asarray(T, dtype='i') s = _hierarchy.leaders(Z, T, L, M, int(kk), int(n)) if s >= 0: raise ValueError(('T is not a valid assignment vector. Error found ' diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index b46ef10ca9a2..9293b1bcd457 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -90,22 +90,24 @@ def check_linkage_tdist(self, method, xp): expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_' + method) assert_allclose(Z, expectedZ, atol=1e-10) - def test_linkage_X(self): + @array_api_compatible + def test_linkage_X(self, xp): for method in ['centroid', 'median', 'ward']: - self.check_linkage_q(method) + self.check_linkage_q(method, xp) - def check_linkage_q(self, method): + def check_linkage_q(self, method, xp): # Tests linkage(Y, method) on the Q data set. - Z = linkage(hierarchy_test_data.X, method) + Z = linkage(xp.asarray(hierarchy_test_data.X), method) expectedZ = getattr(hierarchy_test_data, 'linkage_X_' + method) assert_allclose(Z, expectedZ, atol=1e-06) y = scipy.spatial.distance.pdist(hierarchy_test_data.X, metric="euclidean") - Z = linkage(y, method) + Z = linkage(xp.asarray(y), method) assert_allclose(Z, expectedZ, atol=1e-06) - def test_compare_with_trivial(self): + @array_api_compatible + def test_compare_with_trivial(self, xp): rng = np.random.RandomState(0) n = 20 X = rng.rand(n, 2) @@ -113,11 +115,12 @@ def test_compare_with_trivial(self): for method, code in _LINKAGE_METHODS.items(): Z_trivial = _hierarchy.linkage(d, n, code) - Z = linkage(d, method) + Z = linkage(xp.asarray(d), method) assert_allclose(Z_trivial, Z, rtol=1e-14, atol=1e-15) - def test_optimal_leaf_ordering(self): - Z = linkage(hierarchy_test_data.ytdist, optimal_ordering=True) + @array_api_compatible + def test_optimal_leaf_ordering(self, xp): + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), optimal_ordering=True) expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_single_olo') assert_allclose(Z, expectedZ, atol=1e-10) @@ -140,43 +143,47 @@ class TestLinkageTies: [2, 3, 2.44948974, 3]]), } - def test_linkage_ties(self): + @array_api_compatible + def test_linkage_ties(self, xp): for method in ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']: - self.check_linkage_ties(method) + self.check_linkage_ties(method, xp) - def check_linkage_ties(self, method): - X = np.array([[-1, -1], [0, 0], [1, 1]]) + def check_linkage_ties(self, method, xp): + X = xp.asarray([[-1, -1], [0, 0], [1, 1]]) Z = linkage(X, method=method) expectedZ = self._expectations[method] assert_allclose(Z, expectedZ, atol=1e-06) class TestInconsistent: - def test_inconsistent_tdist(self): + @array_api_compatible + def test_inconsistent_tdist(self, xp): for depth in hierarchy_test_data.inconsistent_ytdist: - self.check_inconsistent_tdist(depth) + self.check_inconsistent_tdist(depth, xp) - def check_inconsistent_tdist(self, depth): - Z = hierarchy_test_data.linkage_ytdist_single + def check_inconsistent_tdist(self, depth, xp): + Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single) assert_allclose(inconsistent(Z, depth), hierarchy_test_data.inconsistent_ytdist[depth]) class TestCopheneticDistance: - def test_linkage_cophenet_tdist_Z(self): + @array_api_compatible + def test_linkage_cophenet_tdist_Z(self, xp): # Tests cophenet(Z) on tdist data set. - expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, - 295, 138, 219, 295, 295]) - Z = hierarchy_test_data.linkage_ytdist_single + expectedM = xp.asarray([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, + 295, 138, 219, 295, 295]) + Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single) M = cophenet(Z) assert_allclose(M, expectedM, atol=1e-10) - def test_linkage_cophenet_tdist_Z_Y(self): + @array_api_compatible + def test_linkage_cophenet_tdist_Z_Y(self, xp): # Tests cophenet(Z, Y) on tdist data set. - Z = hierarchy_test_data.linkage_ytdist_single - (c, M) = cophenet(Z, hierarchy_test_data.ytdist) - expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, - 295, 138, 219, 295, 295]) + Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single) + (c, M) = cophenet(Z, xp.asarray(hierarchy_test_data.ytdist)) + expectedM = xp.asarray([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, + 295, 138, 219, 295, 295]) expectedc = 0.639931296433393415057366837573 assert_allclose(c, expectedc, atol=1e-10) assert_allclose(M, expectedM, atol=1e-10) @@ -190,87 +197,97 @@ def test_mlab_linkage_conversion_empty(self): assert_equal(from_mlab_linkage([]), X) assert_equal(to_mlab_linkage([]), X) - def test_mlab_linkage_conversion_single_row(self): + @array_api_compatible + def test_mlab_linkage_conversion_single_row(self, xp): # Tests from/to_mlab_linkage on linkage array with single row. - Z = np.asarray([[0., 1., 3., 2.]]) - Zm = np.asarray([[1, 2, 3]]) - assert_equal(from_mlab_linkage(Zm), Z) - assert_equal(to_mlab_linkage(Z), Zm) + Z = xp.asarray([[0., 1., 3., 2.]]) + Zm = xp.asarray([[1, 2, 3]]) + assert_allclose(from_mlab_linkage(Zm), Z) + assert_allclose(to_mlab_linkage(Z), Zm) - def test_mlab_linkage_conversion_multiple_rows(self): + @array_api_compatible + def test_mlab_linkage_conversion_multiple_rows(self, xp): # Tests from/to_mlab_linkage on linkage array with multiple rows. - Zm = np.asarray([[3, 6, 138], [4, 5, 219], + Zm = xp.asarray([[3, 6, 138], [4, 5, 219], [1, 8, 255], [2, 9, 268], [7, 10, 295]]) - Z = np.array([[2., 5., 138., 2.], - [3., 4., 219., 2.], - [0., 7., 255., 3.], - [1., 8., 268., 4.], - [6., 9., 295., 6.]], - dtype=np.double) - assert_equal(from_mlab_linkage(Zm), Z) - assert_equal(to_mlab_linkage(Z), Zm) + Z = xp.asarray([[2., 5., 138., 2.], + [3., 4., 219., 2.], + [0., 7., 255., 3.], + [1., 8., 268., 4.], + [6., 9., 295., 6.]], + dtype=xp.float64) + assert_allclose(from_mlab_linkage(Zm), Z) + assert_allclose(to_mlab_linkage(Z), Zm) class TestFcluster: - def test_fclusterdata(self): + @array_api_compatible + def test_fclusterdata(self, xp): for t in hierarchy_test_data.fcluster_inconsistent: - self.check_fclusterdata(t, 'inconsistent') + self.check_fclusterdata(t, 'inconsistent', xp) for t in hierarchy_test_data.fcluster_distance: - self.check_fclusterdata(t, 'distance') + self.check_fclusterdata(t, 'distance', xp) for t in hierarchy_test_data.fcluster_maxclust: - self.check_fclusterdata(t, 'maxclust') + self.check_fclusterdata(t, 'maxclust', xp) - def check_fclusterdata(self, t, criterion): + def check_fclusterdata(self, t, criterion, xp): # Tests fclusterdata(X, criterion=criterion, t=t) on a random 3-cluster data set. - expectedT = getattr(hierarchy_test_data, 'fcluster_' + criterion)[t] - X = hierarchy_test_data.Q_X + expectedT = xp.asarray(getattr(hierarchy_test_data, 'fcluster_' + criterion)[t]) + X = xp.asarray(hierarchy_test_data.Q_X) + t = xp.asarray(t) T = fclusterdata(X, criterion=criterion, t=t) assert_(is_isomorphic(T, expectedT)) - def test_fcluster(self): + @array_api_compatible + def test_fcluster(self, xp): for t in hierarchy_test_data.fcluster_inconsistent: - self.check_fcluster(t, 'inconsistent') + self.check_fcluster(t, 'inconsistent', xp) for t in hierarchy_test_data.fcluster_distance: - self.check_fcluster(t, 'distance') + self.check_fcluster(t, 'distance', xp) for t in hierarchy_test_data.fcluster_maxclust: - self.check_fcluster(t, 'maxclust') + self.check_fcluster(t, 'maxclust', xp) - def check_fcluster(self, t, criterion): + def check_fcluster(self, t, criterion, xp): # Tests fcluster(Z, criterion=criterion, t=t) on a random 3-cluster data set. - expectedT = getattr(hierarchy_test_data, 'fcluster_' + criterion)[t] - Z = single(hierarchy_test_data.Q_X) + expectedT = xp.asarray(getattr(hierarchy_test_data, 'fcluster_' + criterion)[t]) + Z = single(xp.asarray(hierarchy_test_data.Q_X)) + t = xp.asarray(t) T = fcluster(Z, criterion=criterion, t=t) assert_(is_isomorphic(T, expectedT)) - def test_fcluster_monocrit(self): + @array_api_compatible + def test_fcluster_monocrit(self, xp): for t in hierarchy_test_data.fcluster_distance: - self.check_fcluster_monocrit(t) + self.check_fcluster_monocrit(t, xp) for t in hierarchy_test_data.fcluster_maxclust: - self.check_fcluster_maxclust_monocrit(t) + self.check_fcluster_maxclust_monocrit(t, xp) - def check_fcluster_monocrit(self, t): - expectedT = hierarchy_test_data.fcluster_distance[t] - Z = single(hierarchy_test_data.Q_X) + def check_fcluster_monocrit(self, t, xp): + expectedT = xp.asarray(hierarchy_test_data.fcluster_distance[t]) + Z = single(xp.asarray(hierarchy_test_data.Q_X)) T = fcluster(Z, t, criterion='monocrit', monocrit=maxdists(Z)) assert_(is_isomorphic(T, expectedT)) - def check_fcluster_maxclust_monocrit(self, t): - expectedT = hierarchy_test_data.fcluster_maxclust[t] - Z = single(hierarchy_test_data.Q_X) + def check_fcluster_maxclust_monocrit(self, t, xp): + expectedT = xp.asarray(hierarchy_test_data.fcluster_maxclust[t]) + Z = single(xp.asarray(hierarchy_test_data.Q_X)) T = fcluster(Z, t, criterion='maxclust_monocrit', monocrit=maxdists(Z)) assert_(is_isomorphic(T, expectedT)) class TestLeaders: - def test_leaders_single(self): + @array_api_compatible + def test_leaders_single(self, xp): # Tests leaders using a flat clustering generated by single linkage. X = hierarchy_test_data.Q_X Y = pdist(X) + Y = xp.asarray(Y) Z = linkage(Y) T = fcluster(Z, criterion='maxclust', t=3) - Lright = (np.array([53, 55, 56]), np.array([2, 3, 1])) + Lright = (xp.asarray([53, 55, 56]), xp.asarray([2, 3, 1])) + T = xp.asarray(T, dtype=xp.int64) L = leaders(Z, T) - assert_equal(L, Lright) + assert_allclose(xp.concatenate(L), xp.concatenate(Lright)) class TestIsIsomorphic: @@ -282,6 +299,7 @@ def test_is_isomorphic_1(self): assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) + @skip_if_array_api def test_is_isomorphic_2(self): # Tests is_isomorphic on test case #2 (two flat clusters, different labelings) a = np.asarray([1, 7, 1]) @@ -289,12 +307,14 @@ def test_is_isomorphic_2(self): assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) + @skip_if_array_api def test_is_isomorphic_3(self): # Tests is_isomorphic on test case #3 (no flat clusters) a = np.asarray([]) b = np.asarray([]) assert_(is_isomorphic(a, b)) + @skip_if_array_api def test_is_isomorphic_4A(self): # Tests is_isomorphic on test case #4A (3 flat clusters, different labelings, isomorphic) a = np.asarray([1, 2, 3]) @@ -302,6 +322,7 @@ def test_is_isomorphic_4A(self): assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) + @skip_if_array_api def test_is_isomorphic_4B(self): # Tests is_isomorphic on test case #4B (3 flat clusters, different labelings, nonisomorphic) a = np.asarray([1, 2, 3, 3]) @@ -309,6 +330,7 @@ def test_is_isomorphic_4B(self): assert_(is_isomorphic(a, b) is False) assert_(is_isomorphic(b, a) is False) + @skip_if_array_api def test_is_isomorphic_4C(self): # Tests is_isomorphic on test case #4C (3 flat clusters, different labelings, isomorphic) a = np.asarray([7, 2, 3]) @@ -316,12 +338,14 @@ def test_is_isomorphic_4C(self): assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) + @skip_if_array_api def test_is_isomorphic_5(self): # Tests is_isomorphic on test case #5 (1000 observations, 2/3/5 random # clusters, random permutation of the labeling). for nc in [2, 3, 5]: self.help_is_isomorphic_randperm(1000, nc) + @skip_if_array_api def test_is_isomorphic_6(self): # Tests is_isomorphic on test case #5A (1000 observations, 2/3/5 random # clusters, random permutation of the labeling, slightly @@ -329,6 +353,7 @@ def test_is_isomorphic_6(self): for nc in [2, 3, 5]: self.help_is_isomorphic_randperm(1000, nc, True, 5) + @skip_if_array_api def test_is_isomorphic_7(self): # Regression test for gh-6271 a = np.asarray([1, 2, 3]) @@ -351,76 +376,89 @@ def help_is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0): class TestIsValidLinkage: - def test_is_valid_linkage_various_size(self): + @array_api_compatible + def test_is_valid_linkage_various_size(self, xp): for nrow, ncol, valid in [(2, 5, False), (2, 3, False), (1, 4, True), (2, 4, True)]: - self.check_is_valid_linkage_various_size(nrow, ncol, valid) + self.check_is_valid_linkage_various_size(nrow, ncol, valid, xp) - def check_is_valid_linkage_various_size(self, nrow, ncol, valid): + def check_is_valid_linkage_various_size(self, nrow, ncol, valid, xp): # Tests is_valid_linkage(Z) with linkage matrics of various sizes - Z = np.asarray([[0, 1, 3.0, 2, 5], - [3, 2, 4.0, 3, 3]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2, 5], + [3, 2, 4.0, 3, 3]], dtype=xp.float64) Z = Z[:nrow, :ncol] assert_(is_valid_linkage(Z) == valid) if not valid: assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_int_type(self): + @array_api_compatible + def test_is_valid_linkage_int_type(self, xp): # Tests is_valid_linkage(Z) with integer type. - Z = np.asarray([[0, 1, 3.0, 2], + Z = xp.asarray([[0, 1, 3.0, 2], [3, 2, 4.0, 3]], dtype=int) assert_(is_valid_linkage(Z) is False) assert_raises(TypeError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_empty(self): + @array_api_compatible + def test_is_valid_linkage_empty(self, xp): # Tests is_valid_linkage(Z) with empty linkage. - Z = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) assert_(is_valid_linkage(Z) is False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_4_and_up(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) assert_(is_valid_linkage(Z) is True) - def test_is_valid_linkage_4_and_up_neg_index_left(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up_neg_index_left(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative indices (left). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) Z[i//2,0] = -2 assert_(is_valid_linkage(Z) is False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_4_and_up_neg_index_right(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up_neg_index_right(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative indices (right). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) Z[i//2,1] = -2 assert_(is_valid_linkage(Z) is False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_4_and_up_neg_dist(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up_neg_dist(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative distances. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) Z[i//2,2] = -0.5 assert_(is_valid_linkage(Z) is False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_4_and_up_neg_counts(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up_neg_counts(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative counts. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) Z[i//2,3] = -2 assert_(is_valid_linkage(Z) is False) @@ -428,43 +466,49 @@ def test_is_valid_linkage_4_and_up_neg_counts(self): class TestIsValidInconsistent: - def test_is_valid_im_int_type(self): + @array_api_compatible + def test_is_valid_im_int_type(self, xp): # Tests is_valid_im(R) with integer type. - R = np.asarray([[0, 1, 3.0, 2], + R = xp.asarray([[0, 1, 3.0, 2], [3, 2, 4.0, 3]], dtype=int) assert_(is_valid_im(R) is False) assert_raises(TypeError, is_valid_im, R, throw=True) - def test_is_valid_im_various_size(self): + @array_api_compatible + def test_is_valid_im_various_size(self, xp): for nrow, ncol, valid in [(2, 5, False), (2, 3, False), (1, 4, True), (2, 4, True)]: - self.check_is_valid_im_various_size(nrow, ncol, valid) + self.check_is_valid_im_various_size(nrow, ncol, valid, xp) - def check_is_valid_im_various_size(self, nrow, ncol, valid): + def check_is_valid_im_various_size(self, nrow, ncol, valid, xp): # Tests is_valid_im(R) with linkage matrics of various sizes - R = np.asarray([[0, 1, 3.0, 2, 5], - [3, 2, 4.0, 3, 3]], dtype=np.double) + R = xp.asarray([[0, 1, 3.0, 2, 5], + [3, 2, 4.0, 3, 3]], dtype=xp.float64) R = R[:nrow, :ncol] assert_(is_valid_im(R) == valid) if not valid: assert_raises(ValueError, is_valid_im, R, throw=True) - def test_is_valid_im_empty(self): + @array_api_compatible + def test_is_valid_im_empty(self, xp): # Tests is_valid_im(R) with empty inconsistency matrix. - R = np.zeros((0, 4), dtype=np.double) + R = xp.zeros((0, 4), dtype=xp.float64) assert_(is_valid_im(R) is False) assert_raises(ValueError, is_valid_im, R, throw=True) - def test_is_valid_im_4_and_up(self): + @array_api_compatible + def test_is_valid_im_4_and_up(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) assert_(is_valid_im(R) is True) - def test_is_valid_im_4_and_up_neg_index_left(self): + @array_api_compatible + def test_is_valid_im_4_and_up_neg_index_left(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link height means. for i in range(4, 15, 3): @@ -475,22 +519,26 @@ def test_is_valid_im_4_and_up_neg_index_left(self): assert_(is_valid_im(R) is False) assert_raises(ValueError, is_valid_im, R, throw=True) - def test_is_valid_im_4_and_up_neg_index_right(self): + @array_api_compatible + def test_is_valid_im_4_and_up_neg_index_right(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link height standard deviations. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) R[i//2,1] = -2.0 assert_(is_valid_im(R) is False) assert_raises(ValueError, is_valid_im, R, throw=True) - def test_is_valid_im_4_and_up_neg_dist(self): + @array_api_compatible + def test_is_valid_im_4_and_up_neg_dist(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link counts. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) R[i//2,2] = -0.5 @@ -499,307 +547,353 @@ def test_is_valid_im_4_and_up_neg_dist(self): class TestNumObsLinkage: - def test_num_obs_linkage_empty(self): + @array_api_compatible + def test_num_obs_linkage_empty(self, xp): # Tests num_obs_linkage(Z) with empty linkage. - Z = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, num_obs_linkage, Z) - def test_num_obs_linkage_1x4(self): + @array_api_compatible + def test_num_obs_linkage_1x4(self, xp): # Tests num_obs_linkage(Z) on linkage over 2 observations. - Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2]], dtype=xp.float64) assert_equal(num_obs_linkage(Z), 2) - def test_num_obs_linkage_2x4(self): + @array_api_compatible + def test_num_obs_linkage_2x4(self, xp): # Tests num_obs_linkage(Z) on linkage over 3 observations. - Z = np.asarray([[0, 1, 3.0, 2], - [3, 2, 4.0, 3]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2], + [3, 2, 4.0, 3]], dtype=xp.float64) assert_equal(num_obs_linkage(Z), 3) - def test_num_obs_linkage_4_and_up(self): + @array_api_compatible + def test_num_obs_linkage_4_and_up(self, xp): # Tests num_obs_linkage(Z) on linkage on observation sets between sizes # 4 and 15 (step size 3). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) assert_equal(num_obs_linkage(Z), i) class TestLeavesList: - def test_leaves_list_1x4(self): + @array_api_compatible + def test_leaves_list_1x4(self, xp): # Tests leaves_list(Z) on a 1x4 linkage. - Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2]], dtype=xp.float64) to_tree(Z) - assert_equal(leaves_list(Z), [0, 1]) + assert_allclose(leaves_list(Z), [0, 1]) - def test_leaves_list_2x4(self): + @array_api_compatible + def test_leaves_list_2x4(self, xp): # Tests leaves_list(Z) on a 2x4 linkage. - Z = np.asarray([[0, 1, 3.0, 2], - [3, 2, 4.0, 3]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2], + [3, 2, 4.0, 3]], dtype=xp.float64) to_tree(Z) - assert_equal(leaves_list(Z), [0, 1, 2]) + assert_allclose(leaves_list(Z), [0, 1, 2]) - def test_leaves_list_Q(self): + @array_api_compatible + def test_leaves_list_Q(self, xp): for method in ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']: - self.check_leaves_list_Q(method) + self.check_leaves_list_Q(method, xp) - def check_leaves_list_Q(self, method): + def check_leaves_list_Q(self, method, xp): # Tests leaves_list(Z) on the Q data set - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) node = to_tree(Z) - assert_equal(node.pre_order(), leaves_list(Z)) + assert_allclose(node.pre_order(), leaves_list(Z)) - def test_Q_subtree_pre_order(self): + @array_api_compatible + def test_Q_subtree_pre_order(self, xp): # Tests that pre_order() works when called on sub-trees. - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, 'single') node = to_tree(Z) - assert_equal(node.pre_order(), (node.get_left().pre_order() + assert_allclose(node.pre_order(), (node.get_left().pre_order() + node.get_right().pre_order())) class TestCorrespond: - def test_correspond_empty(self): + @array_api_compatible + def test_correspond_empty(self, xp): # Tests correspond(Z, y) with empty linkage and condensed distance matrix. - y = np.zeros((0,)) - Z = np.zeros((0,4)) + y = xp.zeros((0,), dtype=xp.float64) + Z = xp.zeros((0,4), dtype=xp.float64) assert_raises(ValueError, correspond, Z, y) - def test_correspond_2_and_up(self): + @array_api_compatible + def test_correspond_2_and_up(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. for i in range(2, 4): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) assert_(correspond(Z, y)) for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) assert_(correspond(Z, y)) - def test_correspond_4_and_up(self): + @array_api_compatible + def test_correspond_4_and_up(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. Correspondence should be false. for (i, j) in (list(zip(list(range(2, 4)), list(range(3, 5)))) + list(zip(list(range(3, 5)), list(range(2, 4))))): y = np.random.rand(i*(i-1)//2) y2 = np.random.rand(j*(j-1)//2) + y = xp.asarray(y) + y2 = xp.asarray(y2) Z = linkage(y) Z2 = linkage(y2) assert_equal(correspond(Z, y2), False) assert_equal(correspond(Z2, y), False) - def test_correspond_4_and_up_2(self): + @array_api_compatible + def test_correspond_4_and_up_2(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. Correspondence should be false. for (i, j) in (list(zip(list(range(2, 7)), list(range(16, 21)))) + list(zip(list(range(2, 7)), list(range(16, 21))))): y = np.random.rand(i*(i-1)//2) y2 = np.random.rand(j*(j-1)//2) + y = xp.asarray(y) + y2 = xp.asarray(y2) Z = linkage(y) Z2 = linkage(y2) assert_equal(correspond(Z, y2), False) assert_equal(correspond(Z2, y), False) - def test_num_obs_linkage_multi_matrix(self): + @array_api_compatible + def test_num_obs_linkage_multi_matrix(self, xp): # Tests num_obs_linkage with observation matrices of multiple sizes. for n in range(2, 10): X = np.random.rand(n, 4) Y = pdist(X) + Y = xp.asarray(Y) Z = linkage(Y) assert_equal(num_obs_linkage(Z), n) class TestIsMonotonic: - def test_is_monotonic_empty(self): + @array_api_compatible + def test_is_monotonic_empty(self, xp): # Tests is_monotonic(Z) on an empty linkage. - Z = np.zeros((0, 4)) + Z = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, is_monotonic, Z) - def test_is_monotonic_1x4(self): + @array_api_compatible + def test_is_monotonic_1x4(self, xp): # Tests is_monotonic(Z) on 1x4 linkage. Expecting True. - Z = np.asarray([[0, 1, 0.3, 2]], dtype=np.double) - assert_equal(is_monotonic(Z), True) + Z = xp.asarray([[0, 1, 0.3, 2]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), True) - def test_is_monotonic_2x4_T(self): + @array_api_compatible + def test_is_monotonic_2x4_T(self, xp): # Tests is_monotonic(Z) on 2x4 linkage. Expecting True. - Z = np.asarray([[0, 1, 0.3, 2], - [2, 3, 0.4, 3]], dtype=np.double) - assert_equal(is_monotonic(Z), True) + Z = xp.asarray([[0, 1, 0.3, 2], + [2, 3, 0.4, 3]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), True) - def test_is_monotonic_2x4_F(self): + @array_api_compatible + def test_is_monotonic_2x4_F(self, xp): # Tests is_monotonic(Z) on 2x4 linkage. Expecting False. - Z = np.asarray([[0, 1, 0.4, 2], - [2, 3, 0.3, 3]], dtype=np.double) - assert_equal(is_monotonic(Z), False) + Z = xp.asarray([[0, 1, 0.4, 2], + [2, 3, 0.3, 3]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_3x4_T(self): + @array_api_compatible + def test_is_monotonic_3x4_T(self, xp): # Tests is_monotonic(Z) on 3x4 linkage. Expecting True. - Z = np.asarray([[0, 1, 0.3, 2], + Z = xp.asarray([[0, 1, 0.3, 2], [2, 3, 0.4, 2], - [4, 5, 0.6, 4]], dtype=np.double) - assert_equal(is_monotonic(Z), True) + [4, 5, 0.6, 4]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), True) - def test_is_monotonic_3x4_F1(self): + @array_api_compatible + def test_is_monotonic_3x4_F1(self, xp): # Tests is_monotonic(Z) on 3x4 linkage (case 1). Expecting False. - Z = np.asarray([[0, 1, 0.3, 2], + Z = xp.asarray([[0, 1, 0.3, 2], [2, 3, 0.2, 2], - [4, 5, 0.6, 4]], dtype=np.double) - assert_equal(is_monotonic(Z), False) + [4, 5, 0.6, 4]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_3x4_F2(self): + @array_api_compatible + def test_is_monotonic_3x4_F2(self, xp): # Tests is_monotonic(Z) on 3x4 linkage (case 2). Expecting False. - Z = np.asarray([[0, 1, 0.8, 2], + Z = xp.asarray([[0, 1, 0.8, 2], [2, 3, 0.4, 2], - [4, 5, 0.6, 4]], dtype=np.double) - assert_equal(is_monotonic(Z), False) + [4, 5, 0.6, 4]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_3x4_F3(self): + @array_api_compatible + def test_is_monotonic_3x4_F3(self, xp): # Tests is_monotonic(Z) on 3x4 linkage (case 3). Expecting False - Z = np.asarray([[0, 1, 0.3, 2], + Z = xp.asarray([[0, 1, 0.3, 2], [2, 3, 0.4, 2], - [4, 5, 0.2, 4]], dtype=np.double) - assert_equal(is_monotonic(Z), False) + [4, 5, 0.2, 4]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_tdist_linkage1(self): + @array_api_compatible + def test_is_monotonic_tdist_linkage1(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # tdist data set. Expecting True. - Z = linkage(hierarchy_test_data.ytdist, 'single') - assert_equal(is_monotonic(Z), True) + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') + assert_allclose(is_monotonic(Z), True) - def test_is_monotonic_tdist_linkage2(self): + @array_api_compatible + def test_is_monotonic_tdist_linkage2(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # tdist data set. Perturbing. Expecting False. - Z = linkage(hierarchy_test_data.ytdist, 'single') + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') Z[2,2] = 0.0 - assert_equal(is_monotonic(Z), False) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_Q_linkage(self): + @array_api_compatible + def test_is_monotonic_Q_linkage(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # Q data set. Expecting True. - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, 'single') - assert_equal(is_monotonic(Z), True) + assert_allclose(is_monotonic(Z), True) class TestMaxDists: - def test_maxdists_empty_linkage(self): + @array_api_compatible + def test_maxdists_empty_linkage(self, xp): # Tests maxdists(Z) on empty linkage. Expecting exception. - Z = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, maxdists, Z) - def test_maxdists_one_cluster_linkage(self): + @array_api_compatible + def test_maxdists_one_cluster_linkage(self, xp): # Tests maxdists(Z) on linkage with one cluster. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) MD = maxdists(Z) - expectedMD = calculate_maximum_distances(Z) + expectedMD = calculate_maximum_distances(Z, xp) assert_allclose(MD, expectedMD, atol=1e-15) - def test_maxdists_Q_linkage(self): + @array_api_compatible + def test_maxdists_Q_linkage(self, xp): for method in ['single', 'complete', 'ward', 'centroid', 'median']: - self.check_maxdists_Q_linkage(method) + self.check_maxdists_Q_linkage(method, xp) - def check_maxdists_Q_linkage(self, method): + def check_maxdists_Q_linkage(self, method, xp): # Tests maxdists(Z) on the Q data set - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) MD = maxdists(Z) - expectedMD = calculate_maximum_distances(Z) + expectedMD = calculate_maximum_distances(Z, xp) assert_allclose(MD, expectedMD, atol=1e-15) class TestMaxInconsts: - def test_maxinconsts_empty_linkage(self): + @array_api_compatible + def test_maxinconsts_empty_linkage(self, xp): # Tests maxinconsts(Z, R) on empty linkage. Expecting exception. - Z = np.zeros((0, 4), dtype=np.double) - R = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) + R = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, maxinconsts, Z, R) - def test_maxinconsts_difrow_linkage(self): + @array_api_compatible + def test_maxinconsts_difrow_linkage(self, xp): # Tests maxinconsts(Z, R) on linkage and inconsistency matrices with # different numbers of clusters. Expecting exception. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) R = np.random.rand(2, 4) + R = xp.asarray(R) assert_raises(ValueError, maxinconsts, Z, R) - def test_maxinconsts_one_cluster_linkage(self): + @array_api_compatible + def test_maxinconsts_one_cluster_linkage(self, xp): # Tests maxinconsts(Z, R) on linkage with one cluster. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) - R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) + R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64) MD = maxinconsts(Z, R) - expectedMD = calculate_maximum_inconsistencies(Z, R) + expectedMD = calculate_maximum_inconsistencies(Z, R, xp=xp) assert_allclose(MD, expectedMD, atol=1e-15) - def test_maxinconsts_Q_linkage(self): + @array_api_compatible + def test_maxinconsts_Q_linkage(self, xp): for method in ['single', 'complete', 'ward', 'centroid', 'median']: - self.check_maxinconsts_Q_linkage(method) + self.check_maxinconsts_Q_linkage(method, xp) - def check_maxinconsts_Q_linkage(self, method): + def check_maxinconsts_Q_linkage(self, method, xp): # Tests maxinconsts(Z, R) on the Q data set - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) R = inconsistent(Z) MD = maxinconsts(Z, R) - expectedMD = calculate_maximum_inconsistencies(Z, R) + expectedMD = calculate_maximum_inconsistencies(Z, R, xp=xp) assert_allclose(MD, expectedMD, atol=1e-15) class TestMaxRStat: - def test_maxRstat_invalid_index(self): + @array_api_compatible + def test_maxRstat_invalid_index(self, xp): for i in [3.3, -1, 4]: - self.check_maxRstat_invalid_index(i) + self.check_maxRstat_invalid_index(i, xp) - def check_maxRstat_invalid_index(self, i): + def check_maxRstat_invalid_index(self, i, xp): # Tests maxRstat(Z, R, i). Expecting exception. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) - R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) + R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64) if isinstance(i, int): assert_raises(ValueError, maxRstat, Z, R, i) else: assert_raises(TypeError, maxRstat, Z, R, i) - def test_maxRstat_empty_linkage(self): + @array_api_compatible + def test_maxRstat_empty_linkage(self, xp): for i in range(4): - self.check_maxRstat_empty_linkage(i) + self.check_maxRstat_empty_linkage(i, xp) - def check_maxRstat_empty_linkage(self, i): + def check_maxRstat_empty_linkage(self, i, xp): # Tests maxRstat(Z, R, i) on empty linkage. Expecting exception. - Z = np.zeros((0, 4), dtype=np.double) - R = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) + R = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, maxRstat, Z, R, i) - def test_maxRstat_difrow_linkage(self): + @array_api_compatible + def test_maxRstat_difrow_linkage(self, xp): for i in range(4): - self.check_maxRstat_difrow_linkage(i) + self.check_maxRstat_difrow_linkage(i, xp) - def check_maxRstat_difrow_linkage(self, i): + def check_maxRstat_difrow_linkage(self, i, xp): # Tests maxRstat(Z, R, i) on linkage and inconsistency matrices with # different numbers of clusters. Expecting exception. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) R = np.random.rand(2, 4) + R = xp.asarray(R) assert_raises(ValueError, maxRstat, Z, R, i) - def test_maxRstat_one_cluster_linkage(self): + @array_api_compatible + def test_maxRstat_one_cluster_linkage(self, xp): for i in range(4): - self.check_maxRstat_one_cluster_linkage(i) + self.check_maxRstat_one_cluster_linkage(i, xp) - def check_maxRstat_one_cluster_linkage(self, i): + def check_maxRstat_one_cluster_linkage(self, i, xp): # Tests maxRstat(Z, R, i) on linkage with one cluster. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) - R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) + R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64) MD = maxRstat(Z, R, 1) - expectedMD = calculate_maximum_inconsistencies(Z, R, 1) + expectedMD = calculate_maximum_inconsistencies(Z, R, 1, xp) assert_allclose(MD, expectedMD, atol=1e-15) - def test_maxRstat_Q_linkage(self): + @array_api_compatible + def test_maxRstat_Q_linkage(self, xp): for method in ['single', 'complete', 'ward', 'centroid', 'median']: for i in range(4): - self.check_maxRstat_Q_linkage(method, i) + self.check_maxRstat_Q_linkage(method, i, xp) - def check_maxRstat_Q_linkage(self, method, i): + def check_maxRstat_Q_linkage(self, method, i, xp): # Tests maxRstat(Z, R, i) on the Q data set - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) R = inconsistent(Z) MD = maxRstat(Z, R, 1) @@ -808,28 +902,32 @@ def check_maxRstat_Q_linkage(self, method, i): class TestDendrogram: - def test_dendrogram_single_linkage_tdist(self): + @array_api_compatible + def test_dendrogram_single_linkage_tdist(self, xp): # Tests dendrogram calculation on single linkage of the tdist data set. - Z = linkage(hierarchy_test_data.ytdist, 'single') + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') R = dendrogram(Z, no_plot=True) leaves = R["leaves"] assert_equal(leaves, [2, 5, 1, 0, 3, 4]) - def test_valid_orientation(self): - Z = linkage(hierarchy_test_data.ytdist, 'single') + @array_api_compatible + def test_valid_orientation(self, xp): + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') assert_raises(ValueError, dendrogram, Z, orientation="foo") - def test_labels_as_array_or_list(self): + @array_api_compatible + def test_labels_as_array_or_list(self, xp): # test for gh-12418 - Z = linkage(hierarchy_test_data.ytdist, 'single') - labels = np.array([1, 3, 2, 6, 4, 5]) + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') + labels = xp.asarray([1, 3, 2, 6, 4, 5]) result1 = dendrogram(Z, labels=labels, no_plot=True) result2 = dendrogram(Z, labels=labels.tolist(), no_plot=True) assert result1 == result2 + @array_api_compatible @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") - def test_valid_label_size(self): - link = np.array([ + def test_valid_label_size(self, xp): + link = xp.asarray([ [0, 1, 1.0, 4], [2, 3, 1.0, 5], [4, 5, 2.0, 6], @@ -847,14 +945,15 @@ def test_valid_label_size(self): plt.close() + @array_api_compatible @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") - def test_dendrogram_plot(self): + def test_dendrogram_plot(self, xp): for orientation in ['top', 'bottom', 'left', 'right']: - self.check_dendrogram_plot(orientation) + self.check_dendrogram_plot(orientation, xp) - def check_dendrogram_plot(self, orientation): + def check_dendrogram_plot(self, orientation, xp): # Tests dendrogram plotting. - Z = linkage(hierarchy_test_data.ytdist, 'single') + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') expected = {'color_list': ['C1', 'C0', 'C0', 'C0', 'C0'], 'dcoord': [[0.0, 138.0, 138.0, 0.0], [0.0, 219.0, 219.0, 0.0], @@ -876,6 +975,7 @@ def check_dendrogram_plot(self, orientation): # test that dendrogram accepts ax keyword R1 = dendrogram(Z, ax=ax, orientation=orientation) + R1['dcoord'] = np.asarray(R1['dcoord']) assert_equal(R1, expected) # test that dendrogram accepts and handle the leaf_font_size and @@ -887,8 +987,8 @@ def check_dendrogram_plot(self, orientation): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_equal(testlabel.get_rotation(), 90) - assert_equal(testlabel.get_size(), 20) + assert_allclose(testlabel.get_rotation(), 90) + assert_allclose(testlabel.get_size(), 20) dendrogram(Z, ax=ax, orientation=orientation, leaf_rotation=90) testlabel = ( @@ -896,7 +996,7 @@ def check_dendrogram_plot(self, orientation): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_equal(testlabel.get_rotation(), 90) + assert_allclose(testlabel.get_rotation(), 90) dendrogram(Z, ax=ax, orientation=orientation, leaf_font_size=20) testlabel = ( @@ -904,20 +1004,23 @@ def check_dendrogram_plot(self, orientation): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_equal(testlabel.get_size(), 20) + assert_allclose(testlabel.get_size(), 20) plt.close() # test plotting to gca (will import pylab) R2 = dendrogram(Z, orientation=orientation) plt.close() + R2['dcoord'] = np.asarray(R2['dcoord']) assert_equal(R2, expected) + @array_api_compatible @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") - def test_dendrogram_truncate_mode(self): - Z = linkage(hierarchy_test_data.ytdist, 'single') + def test_dendrogram_truncate_mode(self, xp): + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') R = dendrogram(Z, 2, 'lastp', show_contracted=True) plt.close() + R['dcoord'] = np.asarray(R['dcoord']) assert_equal(R, {'color_list': ['C0'], 'dcoord': [[0.0, 295.0, 295.0, 0.0]], 'icoord': [[5.0, 5.0, 15.0, 15.0]], @@ -928,6 +1031,7 @@ def test_dendrogram_truncate_mode(self): R = dendrogram(Z, 2, 'mtica', show_contracted=True) plt.close() + R['dcoord'] = np.asarray(R['dcoord']) assert_equal(R, {'color_list': ['C1', 'C0', 'C0', 'C0'], 'dcoord': [[0.0, 138.0, 138.0, 0.0], [0.0, 255.0, 255.0, 0.0], @@ -942,9 +1046,10 @@ def test_dendrogram_truncate_mode(self): 'leaves_color_list': ['C1', 'C1', 'C0', 'C0', 'C0'], }) - def test_dendrogram_colors(self): + @array_api_compatible + def test_dendrogram_colors(self, xp): # Tests dendrogram plots with alternate colors - Z = linkage(hierarchy_test_data.ytdist, 'single') + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') set_link_color_palette(['c', 'm', 'y', 'k']) R = dendrogram(Z, no_plot=True, @@ -957,30 +1062,32 @@ def test_dendrogram_colors(self): # reset color palette (global list) set_link_color_palette(None) - def test_dendrogram_leaf_colors_zero_dist(self): + @array_api_compatible + def test_dendrogram_leaf_colors_zero_dist(self, xp): # tests that the colors of leafs are correct for tree # with two identical points - x = np.array([[1, 0, 0], - [0, 0, 1], - [0, 2, 0], - [0, 0, 1], - [0, 1, 0], - [0, 1, 0]]) + x = xp.asarray([[1, 0, 0], + [0, 0, 1], + [0, 2, 0], + [0, 0, 1], + [0, 1, 0], + [0, 1, 0]]) z = linkage(x, "single") d = dendrogram(z, no_plot=True) exp_colors = ['C0', 'C1', 'C1', 'C0', 'C2', 'C2'] colors = d["leaves_color_list"] assert_equal(colors, exp_colors) - def test_dendrogram_leaf_colors(self): + @array_api_compatible + def test_dendrogram_leaf_colors(self, xp): # tests that the colors are correct for a tree # with two near points ((0, 0, 1.1) and (0, 0, 1)) - x = np.array([[1, 0, 0], - [0, 0, 1.1], - [0, 2, 0], - [0, 0, 1], - [0, 1, 0], - [0, 1, 0]]) + x = xp.asarray([[1, 0, 0], + [0, 0, 1.1], + [0, 2, 0], + [0, 0, 1], + [0, 1, 0], + [0, 1, 0]]) z = linkage(x, "single") d = dendrogram(z, no_plot=True) exp_colors = ['C0', 'C1', 'C1', 'C0', 'C2', 'C2'] @@ -988,11 +1095,11 @@ def test_dendrogram_leaf_colors(self): assert_equal(colors, exp_colors) -def calculate_maximum_distances(Z): +def calculate_maximum_distances(Z, xp): # Used for testing correctness of maxdists. n = Z.shape[0] + 1 - B = np.zeros((n-1,)) - q = np.zeros((3,)) + B = xp.zeros((n-1,)) + q = xp.zeros((3,)) for i in range(0, n - 1): q[:] = 0.0 left = Z[i, 0] @@ -1006,11 +1113,11 @@ def calculate_maximum_distances(Z): return B -def calculate_maximum_inconsistencies(Z, R, k=3): +def calculate_maximum_inconsistencies(Z, R, k=3, xp=np): # Used for testing correctness of maxinconsts. n = Z.shape[0] + 1 - B = np.zeros((n-1,)) - q = np.zeros((3,)) + B = xp.zeros((n-1,)) + q = xp.zeros((3,)) for i in range(0, n - 1): q[:] = 0.0 left = Z[i, 0] @@ -1024,26 +1131,31 @@ def calculate_maximum_inconsistencies(Z, R, k=3): return B -def test_unsupported_uncondensed_distance_matrix_linkage_warning(): - assert_warns(ClusterWarning, linkage, np.asarray([[0, 1], [1, 0]])) +@array_api_compatible +def test_unsupported_uncondensed_distance_matrix_linkage_warning(xp): + assert_warns(ClusterWarning, linkage, xp.asarray([[0, 1], [1, 0]])) -def test_euclidean_linkage_value_error(): +@array_api_compatible +def test_euclidean_linkage_value_error(xp): for method in scipy.cluster.hierarchy._EUCLIDEAN_METHODS: - assert_raises(ValueError, linkage, np.asarray([[1, 1], [1, 1]]), + assert_raises(ValueError, linkage, xp.asarray([[1, 1], [1, 1]]), method=method, metric='cityblock') -def test_2x2_linkage(): - Z1 = linkage(np.asarray([1]), method='single', metric='euclidean') - Z2 = linkage(np.asarray([[0, 1], [0, 0]]), method='single', metric='euclidean') +@array_api_compatible +def test_2x2_linkage(xp): + Z1 = linkage(xp.asarray([1]), method='single', metric='euclidean') + Z2 = linkage(xp.asarray([[0, 1], [0, 0]]), method='single', metric='euclidean') assert_allclose(Z1, Z2) -def test_node_compare(): +@array_api_compatible +def test_node_compare(xp): np.random.seed(23) nobs = 50 X = np.random.randn(nobs, 4) + X = xp.asarray(X) Z = scipy.cluster.hierarchy.ward(X) tree = to_tree(Z) assert_(tree > tree.get_left()) @@ -1052,46 +1164,50 @@ def test_node_compare(): assert_(tree.get_right() != tree.get_left()) -def test_cut_tree(): +@array_api_compatible +def test_cut_tree(xp): np.random.seed(23) nobs = 50 X = np.random.randn(nobs, 4) + X = xp.asarray(X) Z = scipy.cluster.hierarchy.ward(X) cutree = cut_tree(Z) - assert_equal(cutree[:, 0], np.arange(nobs)) - assert_equal(cutree[:, -1], np.zeros(nobs)) - assert_equal(cutree.max(0), np.arange(nobs - 1, -1, -1)) + assert_allclose(cutree[:, 0], xp.arange(nobs)) + assert_allclose(cutree[:, -1], xp.zeros(nobs)) + assert_equal(np.asarray(cutree).max(0), np.arange(nobs - 1, -1, -1)) - assert_equal(cutree[:, [-5]], cut_tree(Z, n_clusters=5)) - assert_equal(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10])) - assert_equal(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5])) + assert_allclose(cutree[:, [-5]], cut_tree(Z, n_clusters=5)) + assert_allclose(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10])) + assert_allclose(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5])) nodes = _order_cluster_tree(Z) - heights = np.array([node.dist for node in nodes]) + heights = xp.asarray([node.dist for node in nodes]) - assert_equal(cutree[:, np.searchsorted(heights, [5])], - cut_tree(Z, height=5)) - assert_equal(cutree[:, np.searchsorted(heights, [5, 10])], - cut_tree(Z, height=[5, 10])) - assert_equal(cutree[:, np.searchsorted(heights, [10, 5])], - cut_tree(Z, height=[10, 5])) + assert_allclose(cutree[:, np.searchsorted(heights, [5])], + cut_tree(Z, height=5)) + assert_allclose(cutree[:, np.searchsorted(heights, [5, 10])], + cut_tree(Z, height=[5, 10])) + assert_allclose(cutree[:, np.searchsorted(heights, [10, 5])], + cut_tree(Z, height=[10, 5])) -def test_optimal_leaf_ordering(): +@array_api_compatible +def test_optimal_leaf_ordering(xp): # test with the distance vector y - Z = optimal_leaf_ordering(linkage(hierarchy_test_data.ytdist), - hierarchy_test_data.ytdist) + Z = optimal_leaf_ordering(linkage(xp.asarray(hierarchy_test_data.ytdist)), + xp.asarray(hierarchy_test_data.ytdist)) expectedZ = hierarchy_test_data.linkage_ytdist_single_olo assert_allclose(Z, expectedZ, atol=1e-10) # test with the observation matrix X - Z = optimal_leaf_ordering(linkage(hierarchy_test_data.X, 'ward'), - hierarchy_test_data.X) + Z = optimal_leaf_ordering(linkage(xp.asarray(hierarchy_test_data.X), 'ward'), + xp.asarray(hierarchy_test_data.X)) expectedZ = hierarchy_test_data.linkage_X_ward_olo assert_allclose(Z, expectedZ, atol=1e-06) +@skip_if_array_api def test_Heap(): values = np.array([2, -1, 0, -1.5, 3]) heap = Heap(values) From ea6f58b9f437c2f663baea2a5bb49f287018e924 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Mon, 12 Jun 2023 23:49:42 +0200 Subject: [PATCH 61/87] TST: fix conftest backend selection --- scipy/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scipy/conftest.py b/scipy/conftest.py index 2c1459e021e8..11a410612428 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -130,7 +130,7 @@ def check_fpu_mode(request): # only select a subset of backend by filtering out the dict try: xp_available_backends = { - 'backend': xp_available_backends[backend] + backend: xp_available_backends[backend] for backend in SCIPY_ARRAY_API } except KeyError: From a33efc6891e37d97690c1dedc81161dc5907aad9 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 13 Jun 2023 00:13:36 +0200 Subject: [PATCH 62/87] BUG: fix dtype check for int --- scipy/cluster/hierarchy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index d048d193bf16..de6fd92b1d46 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -134,7 +134,7 @@ import numpy as np from . import _hierarchy, _optimal_leaf_ordering import scipy.spatial.distance as distance -from scipy._lib._array_api import array_namespace, as_xparray, isdtype +from scipy._lib._array_api import array_namespace, as_xparray from scipy._lib._disjoint_set import DisjointSet @@ -4166,7 +4166,7 @@ def leaders(Z, T): xp = array_namespace(Z, T) Z = as_xparray(Z, order='c', xp=xp) T = as_xparray(T, order='c', xp=xp) - if not isdtype(T.dtype, kind='signed integer', xp=xp): + if T.dtype != xp.int64: raise TypeError('T must be a one-dimensional array of integers.') is_valid_linkage(Z, throw=True, name='Z') if len(T) != Z.shape[0] + 1: From 4dbde35687143f019019494f4ed1863e9eaeb105 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 13 Jun 2023 00:20:05 +0200 Subject: [PATCH 63/87] TST/MAINT: fix dot and coverage --- scipy/cluster/tests/test_vq.py | 96 +++++++++++++++++++++------------- scipy/cluster/vq.py | 19 +++++-- 2 files changed, 73 insertions(+), 42 deletions(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index c6ac5a4613c6..b9571cb0ea6c 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -15,7 +15,7 @@ skip_if_array_api, skip_if_array_api_gpu, array_api_compatible ) from scipy.sparse._sputils import matrix -from scipy._lib._array_api import SCIPY_ARRAY_API +from scipy._lib._array_api import SCIPY_ARRAY_API, as_xparray TESTDATA_2D = np.array([ @@ -148,30 +148,38 @@ def test_vq(self): assert_array_equal(label1, LABEL1) tlabel1, tdist = vq(tp(X), tp(initc)) - def test_vq_1d(self): + @array_api_compatible + def test_vq_1d(self, xp): # Test special rank 1 vq algo, python implementation. data = X[:, 0] initc = data[:3] a, b = _vq.vq(data, initc) + data = xp.asarray(data) + initc = xp.asarray(initc) ta, tb = py_vq(data[:, np.newaxis], initc[:, np.newaxis]) assert_array_equal(a, ta) assert_array_equal(b, tb) + @skip_if_array_api def test__vq_sametype(self): a = np.array([1.0, 2.0], dtype=np.float64) b = a.astype(np.float32) assert_raises(TypeError, _vq.vq, a, b) + @skip_if_array_api def test__vq_invalid_type(self): a = np.array([1, 2], dtype=int) assert_raises(TypeError, _vq.vq, a, a) - def test_vq_large_nfeat(self): + @array_api_compatible + def test_vq_large_nfeat(self, xp): X = np.random.rand(20, 20) code_book = np.random.rand(3, 20) codes0, dis0 = _vq.vq(X, code_book) - codes1, dis1 = py_vq(X, code_book) + codes1, dis1 = py_vq( + xp.asarray(X), xp.asarray(code_book) + ) assert_allclose(dis0, dis1, 1e-5) assert_array_equal(codes0, codes1) @@ -179,22 +187,28 @@ def test_vq_large_nfeat(self): code_book = code_book.astype(np.float32) codes0, dis0 = _vq.vq(X, code_book) - codes1, dis1 = py_vq(X, code_book) + codes1, dis1 = py_vq( + xp.asarray(X), xp.asarray(code_book) + ) assert_allclose(dis0, dis1, 1e-5) assert_array_equal(codes0, codes1) - def test_vq_large_features(self): + @array_api_compatible + def test_vq_large_features(self, xp): X = np.random.rand(10, 5) * 1000000 code_book = np.random.rand(2, 5) * 1000000 codes0, dis0 = _vq.vq(X, code_book) - codes1, dis1 = py_vq(X, code_book) + codes1, dis1 = py_vq( + xp.asarray(X), xp.asarray(code_book) + ) assert_allclose(dis0, dis1, 1e-5) assert_array_equal(codes0, codes1) class TestKMean: - def test_large_features(self): + @array_api_compatible + def test_large_features(self, xp): # Generate a data set with large values, and run kmeans on it to # (regression for 1077). d = 300 @@ -209,7 +223,7 @@ def test_large_features(self): data[:x.shape[0]] = x data[x.shape[0]:] = y - kmeans(data, np.asarray(2)) + kmeans(xp.asarray(data), xp.asarray(2)) @array_api_compatible def test_kmeans_simple(self, xp): @@ -220,12 +234,13 @@ def test_kmeans_simple(self, xp): code1 = kmeans(tp(X), tp(initc), iter=1)[0] assert_array_almost_equal(code1, CODET2) - def test_kmeans_lost_cluster(self): + @array_api_compatible + def test_kmeans_lost_cluster(self, xp): # This will cause kmeans to have a cluster with no points. - data = TESTDATA_2D - initk = np.array([[-1.8127404, -0.67128041], - [2.04621601, 0.07401111], - [-2.31149087, -0.05160469]]) + data = xp.asarray(TESTDATA_2D) + initk = xp.asarray([[-1.8127404, -0.67128041], + [2.04621601, 0.07401111], + [-2.31149087, -0.05160469]]) kmeans(data, initk) with suppress_warnings() as sup: @@ -248,31 +263,35 @@ def test_kmeans2_simple(self, xp): assert_array_almost_equal(code1, CODET1) assert_array_almost_equal(code2, CODET2) - def test_kmeans2_rank1(self): - data = TESTDATA_2D + @array_api_compatible + def test_kmeans2_rank1(self, xp): + data = xp.asarray(TESTDATA_2D) data1 = data[:, 0] initc = data1[:3] - code = initc.copy() + code = as_xparray(initc, copy=True, xp=xp) kmeans2(data1, code, iter=1)[0] kmeans2(data1, code, iter=2)[0] - def test_kmeans2_rank1_2(self): - data = TESTDATA_2D + @array_api_compatible + def test_kmeans2_rank1_2(self, xp): + data = xp.asarray(TESTDATA_2D) data1 = data[:, 0] - kmeans2(data1, np.asarray(2), iter=1) + kmeans2(data1, xp.asarray(2), iter=1) - def test_kmeans2_high_dim(self): + @array_api_compatible + def test_kmeans2_high_dim(self, xp): # test kmeans2 when the number of dimensions exceeds the number # of input points - data = TESTDATA_2D + data = xp.asarray(TESTDATA_2D) data = data.reshape((20, 20))[:10] - kmeans2(data, np.asarray(2)) + kmeans2(data, xp.asarray(2)) - def test_kmeans2_init(self): + @array_api_compatible + def test_kmeans2_init(self, xp): np.random.seed(12345) - data = TESTDATA_2D - k = np.asarray(3) + data = xp.asarray(TESTDATA_2D) + k = xp.asarray(3) kmeans2(data, k, minit='points') kmeans2(data[:, :1], k, minit='points') # special case (1-D) @@ -286,10 +305,11 @@ def test_kmeans2_init(self): kmeans2(data, k, minit='random') kmeans2(data[:, :1], k, minit='random') # special case (1-D) + @array_api_compatible @pytest.mark.skipif(sys.platform == 'win32', reason='Fails with MemoryError in Wine.') - def test_krandinit(self): - data = TESTDATA_2D + def test_krandinit(self, xp): + data = xp.asarray(TESTDATA_2D) datas = [data.reshape((200, 2)), data.reshape((20, 20))[:10]] k = int(1e6) for data in datas: @@ -299,14 +319,15 @@ def test_krandinit(self): else: rng = np.random.RandomState(1234) - init = _krandinit(data, k, rng, np) - orig_cov = np.cov(data, rowvar=0) - init_cov = np.cov(init, rowvar=0) + init = _krandinit(data, k, rng, xp) + orig_cov = xp.cov(data.T) + init_cov = xp.cov(init.T) assert_allclose(orig_cov, init_cov, atol=1e-2) - def test_kmeans2_empty(self): + @array_api_compatible + def test_kmeans2_empty(self, xp): # Regression test for gh-1032. - assert_raises(ValueError, kmeans2, np.asarray([]), np.asarray(2)) + assert_raises(ValueError, kmeans2, xp.asarray([]), xp.asarray(2)) @skip_if_array_api def test_kmeans_0k(self): @@ -323,12 +344,13 @@ def test_kmeans_large_thres(self, xp): assert_allclose(res[0], xp.asarray([4.])) assert_allclose(res[1], 2.3999999999999999) - def test_kmeans2_kpp_low_dim(self): + @array_api_compatible + def test_kmeans2_kpp_low_dim(self, xp): # Regression test for gh-11462 - prev_res = np.array([[-1.95266667, 0.898], - [-3.153375, 3.3945]]) + prev_res = xp.asarray([[-1.95266667, 0.898], + [-3.153375, 3.3945]]) np.random.seed(42) - res, _ = kmeans2(TESTDATA_2D, np.asarray(2), minit='++') + res, _ = kmeans2(xp.asarray(TESTDATA_2D), xp.asarray(2), minit='++') assert_allclose(res, prev_res) @skip_if_array_api_gpu diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 1b7bbfaaf1cd..19dfa8122747 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -514,7 +514,7 @@ def _kpoints(data, k, rng, xp): A 'k' by 'N' containing the initial centroids """ - idx = rng.choice(data.shape[0], size=k, replace=False) + idx = rng.choice(data.shape[0], size=int(k), replace=False) return data[idx] @@ -546,20 +546,29 @@ def _krandinit(data, k, rng, xp): if data.ndim == 1: cov = xp.cov(data) x = rng.standard_normal(size=k) + x = xp.asarray(x) x *= xp.sqrt(cov) elif data.shape[1] > data.shape[0]: # initialize when the covariance matrix is rank deficient _, s, vh = xp.linalg.svd(data - mu, full_matrices=False) x = rng.standard_normal(size=(k, size(s))) - sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - 1) - x = x.dot(sVh) + x = xp.asarray(x) + sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - xp.asarray(1)) + if xp.__name__ in {"array_api_compat.torch", "torch"}: + x = x.matmul(sVh) + else: + x = x.dot(sVh) else: - cov = np.atleast_2d(xp.cov(data, rowvar=False)) + cov = xp.atleast_2d(xp.cov(data.T)) # k rows, d cols (one row = one obs) # Generate k sample of a random variable ~ Gaussian(mu, cov) x = rng.standard_normal(size=(k, size(mu))) - x = x.dot(xp.linalg.cholesky(cov).T) + x = xp.asarray(x) + if xp.__name__ in {"array_api_compat.torch", "torch"}: + x = x.matmul(xp.linalg.cholesky(cov).T) + else: + x = x.dot(xp.linalg.cholesky(cov).T) x += mu return x From bef978c346e08e723b544534f4bb2bfded206211 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 13 Jun 2023 18:53:18 +0200 Subject: [PATCH 64/87] MAINT: fix matmul specialization --- scipy/cluster/vq.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 19dfa8122747..3c7b6df8eccd 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -554,10 +554,7 @@ def _krandinit(data, k, rng, xp): x = rng.standard_normal(size=(k, size(s))) x = xp.asarray(x) sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - xp.asarray(1)) - if xp.__name__ in {"array_api_compat.torch", "torch"}: - x = x.matmul(sVh) - else: - x = x.dot(sVh) + x = xp.matmul(x, sVh) else: cov = xp.atleast_2d(xp.cov(data.T)) @@ -565,10 +562,7 @@ def _krandinit(data, k, rng, xp): # Generate k sample of a random variable ~ Gaussian(mu, cov) x = rng.standard_normal(size=(k, size(mu))) x = xp.asarray(x) - if xp.__name__ in {"array_api_compat.torch", "torch"}: - x = x.matmul(xp.linalg.cholesky(cov).T) - else: - x = x.dot(xp.linalg.cholesky(cov).T) + x = xp.matmul(x, xp.linalg.cholesky(cov).T) x += mu return x From 60e3103e6c08ee476636caffeee378e346bb99af Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 13 Jun 2023 18:53:35 +0200 Subject: [PATCH 65/87] TST: add more mps skipping --- scipy/cluster/tests/test_hierarchy.py | 19 ++++++++++++++++++- scipy/cluster/tests/test_vq.py | 5 +++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index 9293b1bcd457..7cc6bfaec9b8 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -47,7 +47,9 @@ _order_cluster_tree, _hierarchy, _LINKAGE_METHODS) from scipy.spatial.distance import pdist from scipy.cluster._hierarchy import Heap -from scipy.conftest import skip_if_array_api, array_api_compatible +from scipy.conftest import ( + skip_if_array_api, array_api_compatible, skip_if_array_api_gpu +) from . import hierarchy_test_data @@ -399,6 +401,7 @@ def test_is_valid_linkage_int_type(self, xp): assert_(is_valid_linkage(Z) is False) assert_raises(TypeError, is_valid_linkage, Z, throw=True) + @skip_if_array_api_gpu @array_api_compatible def test_is_valid_linkage_empty(self, xp): # Tests is_valid_linkage(Z) with empty linkage. @@ -489,6 +492,7 @@ def check_is_valid_im_various_size(self, nrow, ncol, valid, xp): if not valid: assert_raises(ValueError, is_valid_im, R, throw=True) + @skip_if_array_api_gpu @array_api_compatible def test_is_valid_im_empty(self, xp): # Tests is_valid_im(R) with empty inconsistency matrix. @@ -513,6 +517,7 @@ def test_is_valid_im_4_and_up_neg_index_left(self, xp): # (step size 3) with negative link height means. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) R[i//2,0] = -2.0 @@ -547,6 +552,7 @@ def test_is_valid_im_4_and_up_neg_dist(self, xp): class TestNumObsLinkage: + @skip_if_array_api_gpu @array_api_compatible def test_num_obs_linkage_empty(self, xp): # Tests num_obs_linkage(Z) with empty linkage. @@ -617,6 +623,7 @@ def test_Q_subtree_pre_order(self, xp): class TestCorrespond: + @skip_if_array_api_gpu @array_api_compatible def test_correspond_empty(self, xp): # Tests correspond(Z, y) with empty linkage and condensed distance matrix. @@ -681,6 +688,7 @@ def test_num_obs_linkage_multi_matrix(self, xp): class TestIsMonotonic: + @skip_if_array_api_gpu @array_api_compatible def test_is_monotonic_empty(self, xp): # Tests is_monotonic(Z) on an empty linkage. @@ -764,12 +772,14 @@ def test_is_monotonic_Q_linkage(self, xp): class TestMaxDists: + @skip_if_array_api_gpu @array_api_compatible def test_maxdists_empty_linkage(self, xp): # Tests maxdists(Z) on empty linkage. Expecting exception. Z = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, maxdists, Z) + @skip_if_array_api_gpu @array_api_compatible def test_maxdists_one_cluster_linkage(self, xp): # Tests maxdists(Z) on linkage with one cluster. @@ -778,6 +788,7 @@ def test_maxdists_one_cluster_linkage(self, xp): expectedMD = calculate_maximum_distances(Z, xp) assert_allclose(MD, expectedMD, atol=1e-15) + @skip_if_array_api_gpu @array_api_compatible def test_maxdists_Q_linkage(self, xp): for method in ['single', 'complete', 'ward', 'centroid', 'median']: @@ -793,6 +804,7 @@ def check_maxdists_Q_linkage(self, method, xp): class TestMaxInconsts: + @skip_if_array_api_gpu @array_api_compatible def test_maxinconsts_empty_linkage(self, xp): # Tests maxinconsts(Z, R) on empty linkage. Expecting exception. @@ -809,6 +821,7 @@ def test_maxinconsts_difrow_linkage(self, xp): R = xp.asarray(R) assert_raises(ValueError, maxinconsts, Z, R) + @skip_if_array_api_gpu @array_api_compatible def test_maxinconsts_one_cluster_linkage(self, xp): # Tests maxinconsts(Z, R) on linkage with one cluster. @@ -818,6 +831,7 @@ def test_maxinconsts_one_cluster_linkage(self, xp): expectedMD = calculate_maximum_inconsistencies(Z, R, xp=xp) assert_allclose(MD, expectedMD, atol=1e-15) + @skip_if_array_api_gpu @array_api_compatible def test_maxinconsts_Q_linkage(self, xp): for method in ['single', 'complete', 'ward', 'centroid', 'median']: @@ -848,6 +862,7 @@ def check_maxRstat_invalid_index(self, i, xp): else: assert_raises(TypeError, maxRstat, Z, R, i) + @skip_if_array_api_gpu @array_api_compatible def test_maxRstat_empty_linkage(self, xp): for i in range(4): @@ -872,6 +887,7 @@ def check_maxRstat_difrow_linkage(self, i, xp): R = xp.asarray(R) assert_raises(ValueError, maxRstat, Z, R, i) + @skip_if_array_api_gpu @array_api_compatible def test_maxRstat_one_cluster_linkage(self, xp): for i in range(4): @@ -1164,6 +1180,7 @@ def test_node_compare(xp): assert_(tree.get_right() != tree.get_left()) +@skip_if_array_api_gpu @array_api_compatible def test_cut_tree(xp): np.random.seed(23) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index b9571cb0ea6c..7304d2cdd553 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -148,6 +148,7 @@ def test_vq(self): assert_array_equal(label1, LABEL1) tlabel1, tdist = vq(tp(X), tp(initc)) + @skip_if_array_api_gpu @array_api_compatible def test_vq_1d(self, xp): # Test special rank 1 vq algo, python implementation. @@ -171,6 +172,7 @@ def test__vq_invalid_type(self): a = np.array([1, 2], dtype=int) assert_raises(TypeError, _vq.vq, a, a) + @skip_if_array_api_gpu @array_api_compatible def test_vq_large_nfeat(self, xp): X = np.random.rand(20, 20) @@ -193,6 +195,7 @@ def test_vq_large_nfeat(self, xp): assert_allclose(dis0, dis1, 1e-5) assert_array_equal(codes0, codes1) + @skip_if_array_api_gpu @array_api_compatible def test_vq_large_features(self, xp): X = np.random.rand(10, 5) * 1000000 @@ -287,6 +290,7 @@ def test_kmeans2_high_dim(self, xp): data = data.reshape((20, 20))[:10] kmeans2(data, xp.asarray(2)) + @skip_if_array_api_gpu @array_api_compatible def test_kmeans2_init(self, xp): np.random.seed(12345) @@ -344,6 +348,7 @@ def test_kmeans_large_thres(self, xp): assert_allclose(res[0], xp.asarray([4.])) assert_allclose(res[1], 2.3999999999999999) + @skip_if_array_api_gpu @array_api_compatible def test_kmeans2_kpp_low_dim(self, xp): # Regression test for gh-11462 From b201851ef45f4ef599525e7c5150d5d32cf31413 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 14 Jun 2023 17:23:05 +0200 Subject: [PATCH 66/87] CI: fix pytorch version --- .github/workflows/array_api.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 87918f482e67..89c5771040d3 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -23,7 +23,7 @@ concurrency: jobs: pytorch_cpu: name: Linux PyTorch CPU - # if: "github.repository == 'scipy/scipy' || github.repository == ''" + if: "github.repository == 'scipy/scipy' || github.repository == ''" runs-on: ubuntu-22.04 strategy: matrix: @@ -54,7 +54,7 @@ jobs: python -m pip install numpy cython pytest pytest-xdist pytest-timeout pybind11 mpmath gmpy2 pythran ninja meson click rich-click doit pydevtool pooch # Packages for Array API testing python -m pip install array-api-compat - python -m pip install torch --index-url https://download.pytorch.org/whl/cpu + python -m pip install torch<2.1 --index-url https://download.pytorch.org/whl/cpu - name: Prepare compiler cache id: prep-ccache @@ -82,6 +82,6 @@ jobs: run: | export OMP_NUM_THREADS=2 export SCIPY_USE_PROPACK=1 - # remove -s cluster for the real PR probably + # expand as new modules are added python dev.py --no-build test --array-api-backend pytorch --array-api-backend numpy -s cluster -- --durations 10 --timeout=60 python dev.py --no-build test --array-api-backend all --tests scipy/_lib/tests/test_array_api.py From 0f022a1bb0e544611f248847a8cfb0e193333cba Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 14 Jun 2023 17:23:44 +0200 Subject: [PATCH 67/87] TST/MAINT: some rtol and type adjustments [skip ci] --- scipy/cluster/hierarchy.py | 12 +++---- scipy/cluster/tests/test_hierarchy.py | 47 ++++++++++++++------------- scipy/cluster/tests/test_vq.py | 2 +- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index de6fd92b1d46..b1d11d0067d6 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -1694,7 +1694,7 @@ def cophenet(Z, Y=None): numerator = (Yy * Zz) denomA = Yy**2 denomB = Zz**2 - c = numerator.sum() / xp.sqrt(denomA.sum() * denomB.sum()) + c = xp.sum(numerator) / xp.sqrt(xp.sum(denomA) * xp.sum(denomB)) return (c, zz) @@ -2588,7 +2588,7 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): # Since the C code does not support striding using strides. # The dimensions are used instead. [R] = _copy_arrays_if_base_present([R]) - R = np.asarray(R) + R = np.asarray(R) _hierarchy.cluster_in(Z, R, T, float(t), int(n)) elif criterion == 'distance': _hierarchy.cluster_dist(Z, T, float(t), int(n)) @@ -4166,7 +4166,7 @@ def leaders(Z, T): xp = array_namespace(Z, T) Z = as_xparray(Z, order='c', xp=xp) T = as_xparray(T, order='c', xp=xp) - if T.dtype != xp.int64: + if T.dtype != xp.int32: raise TypeError('T must be a one-dimensional array of integers.') is_valid_linkage(Z, throw=True, name='Z') if len(T) != Z.shape[0] + 1: @@ -4174,12 +4174,12 @@ def leaders(Z, T): Cl = np.unique(T) kk = len(Cl) - L = np.zeros((kk,), dtype='i') - M = np.zeros((kk,), dtype='i') + L = np.zeros((kk,), dtype=np.int32) + M = np.zeros((kk,), dtype=np.int32) n = Z.shape[0] + 1 [Z, T] = _copy_arrays_if_base_present([Z, T]) Z = np.asarray(Z) - T = np.asarray(T, dtype='i') + T = np.asarray(T, dtype=np.int32) s = _hierarchy.leaders(Z, T, L, M, int(kk), int(n)) if s >= 0: raise ValueError(('T is not a valid assignment vector. Error found ' diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index 7cc6bfaec9b8..7ba0d000f386 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -204,8 +204,8 @@ def test_mlab_linkage_conversion_single_row(self, xp): # Tests from/to_mlab_linkage on linkage array with single row. Z = xp.asarray([[0., 1., 3., 2.]]) Zm = xp.asarray([[1, 2, 3]]) - assert_allclose(from_mlab_linkage(Zm), Z) - assert_allclose(to_mlab_linkage(Z), Zm) + assert_allclose(from_mlab_linkage(Zm), Z, rtol=1e-15) + assert_allclose(to_mlab_linkage(Z), Zm, rtol=1e-15) @array_api_compatible def test_mlab_linkage_conversion_multiple_rows(self, xp): @@ -218,8 +218,8 @@ def test_mlab_linkage_conversion_multiple_rows(self, xp): [1., 8., 268., 4.], [6., 9., 295., 6.]], dtype=xp.float64) - assert_allclose(from_mlab_linkage(Zm), Z) - assert_allclose(to_mlab_linkage(Z), Zm) + assert_allclose(from_mlab_linkage(Zm), Z, rtol=1e-15) + assert_allclose(to_mlab_linkage(Z), Zm, rtol=1e-15) class TestFcluster: @@ -287,9 +287,9 @@ def test_leaders_single(self, xp): Z = linkage(Y) T = fcluster(Z, criterion='maxclust', t=3) Lright = (xp.asarray([53, 55, 56]), xp.asarray([2, 3, 1])) - T = xp.asarray(T, dtype=xp.int64) + T = xp.asarray(T, dtype=xp.int32) L = leaders(Z, T) - assert_allclose(xp.concatenate(L), xp.concatenate(Lright)) + assert_allclose(xp.concatenate(L), xp.concatenate(Lright), rtol=1e-15) class TestIsIsomorphic: @@ -589,7 +589,7 @@ def test_leaves_list_1x4(self, xp): # Tests leaves_list(Z) on a 1x4 linkage. Z = xp.asarray([[0, 1, 3.0, 2]], dtype=xp.float64) to_tree(Z) - assert_allclose(leaves_list(Z), [0, 1]) + assert_allclose(leaves_list(Z), [0, 1], rtol=1e-15) @array_api_compatible def test_leaves_list_2x4(self, xp): @@ -597,7 +597,7 @@ def test_leaves_list_2x4(self, xp): Z = xp.asarray([[0, 1, 3.0, 2], [3, 2, 4.0, 3]], dtype=xp.float64) to_tree(Z) - assert_allclose(leaves_list(Z), [0, 1, 2]) + assert_allclose(leaves_list(Z), [0, 1, 2], rtol=1e-15) @array_api_compatible def test_leaves_list_Q(self, xp): @@ -610,7 +610,7 @@ def check_leaves_list_Q(self, method, xp): X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) node = to_tree(Z) - assert_allclose(node.pre_order(), leaves_list(Z)) + assert_allclose(node.pre_order(), leaves_list(Z), rtol=1e-15) @array_api_compatible def test_Q_subtree_pre_order(self, xp): @@ -619,7 +619,8 @@ def test_Q_subtree_pre_order(self, xp): Z = linkage(X, 'single') node = to_tree(Z) assert_allclose(node.pre_order(), (node.get_left().pre_order() - + node.get_right().pre_order())) + + node.get_right().pre_order()), + rtol=1e-15) class TestCorrespond: @@ -1003,8 +1004,8 @@ def check_dendrogram_plot(self, orientation, xp): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_allclose(testlabel.get_rotation(), 90) - assert_allclose(testlabel.get_size(), 20) + assert_allclose(testlabel.get_rotation(), 90, rtol=1e-15) + assert_allclose(testlabel.get_size(), 20, rtol=1e-15) dendrogram(Z, ax=ax, orientation=orientation, leaf_rotation=90) testlabel = ( @@ -1012,7 +1013,7 @@ def check_dendrogram_plot(self, orientation, xp): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_allclose(testlabel.get_rotation(), 90) + assert_allclose(testlabel.get_rotation(), 90, rtol=1e-15) dendrogram(Z, ax=ax, orientation=orientation, leaf_font_size=20) testlabel = ( @@ -1020,7 +1021,7 @@ def check_dendrogram_plot(self, orientation, xp): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_allclose(testlabel.get_size(), 20) + assert_allclose(testlabel.get_size(), 20, rtol=1e-15) plt.close() # test plotting to gca (will import pylab) @@ -1163,7 +1164,7 @@ def test_euclidean_linkage_value_error(xp): def test_2x2_linkage(xp): Z1 = linkage(xp.asarray([1]), method='single', metric='euclidean') Z2 = linkage(xp.asarray([[0, 1], [0, 0]]), method='single', metric='euclidean') - assert_allclose(Z1, Z2) + assert_allclose(Z1, Z2, rtol=1e-15) @array_api_compatible @@ -1190,23 +1191,23 @@ def test_cut_tree(xp): Z = scipy.cluster.hierarchy.ward(X) cutree = cut_tree(Z) - assert_allclose(cutree[:, 0], xp.arange(nobs)) - assert_allclose(cutree[:, -1], xp.zeros(nobs)) + assert_allclose(cutree[:, 0], xp.arange(nobs), rtol=1e-15) + assert_allclose(cutree[:, -1], xp.zeros(nobs), rtol=1e-15) assert_equal(np.asarray(cutree).max(0), np.arange(nobs - 1, -1, -1)) - assert_allclose(cutree[:, [-5]], cut_tree(Z, n_clusters=5)) - assert_allclose(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10])) - assert_allclose(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5])) + assert_allclose(cutree[:, [-5]], cut_tree(Z, n_clusters=5), rtol=1e-15) + assert_allclose(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10]), rtol=1e-15) + assert_allclose(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5]), rtol=1e-15) nodes = _order_cluster_tree(Z) heights = xp.asarray([node.dist for node in nodes]) assert_allclose(cutree[:, np.searchsorted(heights, [5])], - cut_tree(Z, height=5)) + cut_tree(Z, height=5), rtol=1e-15) assert_allclose(cutree[:, np.searchsorted(heights, [5, 10])], - cut_tree(Z, height=[5, 10])) + cut_tree(Z, height=[5, 10]), rtol=1e-15) assert_allclose(cutree[:, np.searchsorted(heights, [10, 5])], - cut_tree(Z, height=[10, 5])) + cut_tree(Z, height=[10, 5]), rtol=1e-15) @array_api_compatible diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 7304d2cdd553..5f2bf671dd61 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -343,7 +343,7 @@ def test_kmeans_0k(self): @array_api_compatible def test_kmeans_large_thres(self, xp): # Regression test for gh-1774 - x = xp.asarray([1, 2, 3, 4, 10], dtype=float) + x = xp.asarray([1, 2, 3, 4, 10], dtype=xp.float64) res = kmeans(x, xp.asarray(1), thresh=1e16) assert_allclose(res[0], xp.asarray([4.])) assert_allclose(res[1], 2.3999999999999999) From 226ac88ff86f212234a1d352a3c61a83546c1ca5 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 16 Jun 2023 14:00:49 +0200 Subject: [PATCH 68/87] MAINT/TST: fix some type conversions and methods to functions --- scipy/_lib/_array_api.py | 1 - scipy/cluster/hierarchy.py | 96 +++++++++++++++------------ scipy/cluster/tests/test_hierarchy.py | 20 +++--- scipy/cluster/tests/test_vq.py | 4 +- scipy/cluster/vq.py | 20 +++--- 5 files changed, 77 insertions(+), 64 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 8bd74b392b43..5201474b2953 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -6,7 +6,6 @@ The SciPy use case of the Array API is described on the following page: https://data-apis.org/array-api/latest/use_cases.html#use-case-scipy """ -import math import os import numpy as np diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index b1d11d0067d6..f5225c3aa9d0 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -1027,7 +1027,7 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): raise ValueError("Method '{}' requires the distance metric " "to be Euclidean".format(method)) if y.shape[0] == y.shape[1] and np.allclose(np.diag(y), 0): - if xp.all(y >= 0) and xp.allclose(y, y.T): + if xp.all(y >= 0) and np.allclose(y, y.T): _warning('The symmetric non-negative hollow observation ' 'matrix looks suspiciously like an uncondensed ' 'distance matrix') @@ -1361,7 +1361,7 @@ def cut_tree(Z, n_clusters=None, height=None): n_cols = 1 cols_idx = xp.asarray([cols_idx]) - groups = xp.zeros((n_cols, nobs), dtype=int) + groups = xp.zeros((n_cols, nobs), dtype=xp.int64) last_group = xp.arange(nobs) if 0 in cols_idx: groups[0] = last_group @@ -1369,8 +1369,8 @@ def cut_tree(Z, n_clusters=None, height=None): for i, node in enumerate(nodes): idx = node.pre_order() this_group = as_xparray(last_group, copy=True, xp=xp) - this_group[idx] = last_group[idx].min() - this_group[this_group > last_group[idx].max()] -= 1 + this_group[idx] = xp.min(last_group[idx]) + this_group[this_group > xp.max(last_group[idx])] -= 1 if i + 1 in cols_idx: groups[np.nonzero(i + 1 == cols_idx)[0]] = this_group last_group = this_group @@ -1438,7 +1438,8 @@ def to_tree(Z, rd=False): 9 """ - Z = as_xparray(Z, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') # Number of original objects is equal to the number of rows plus 1. @@ -1453,9 +1454,11 @@ def to_tree(Z, rd=False): nd = None - for i, row in enumerate(Z): - fi = int(row[0]) - fj = int(row[1]) + for i in range(Z.shape[0]): + row = Z[i, :] + + fi = xp.astype(row[0], xp.int64) + fj = xp.astype(row[1], xp.int64) if fi > i + n: raise ValueError(('Corrupt matrix Z. Index to derivative cluster ' 'is used before it is formed. See row %d, ' @@ -1687,8 +1690,8 @@ def cophenet(Z, Y=None): Y = as_xparray(Y, order='c', xp=xp) distance.is_valid_y(Y, throw=True, name='Y') - z = zz.mean() - y = Y.mean() + z = xp.mean(zz) + y = xp.mean(Y) Yy = Y - y Zz = zz - z numerator = (Yy * Zz) @@ -1861,7 +1864,7 @@ def from_mlab_linkage(Z): return as_xparray(Z, copy=True, xp=xp) Zpart = as_xparray(Z, copy=True, xp=xp) - if Zpart[:, 0:2].min() != 1.0 and Zpart[:, 0:2].max() != 2 * Zs[0]: + if xp.min(Zpart[:, 0:2]) != 1.0 and xp.max(Zpart[:, 0:2]) != 2 * Zs[0]: raise ValueError('The format of the indices is not 1..N') Zpart[:, 0:2] -= 1.0 @@ -2038,11 +2041,12 @@ def is_monotonic(Z): increasing order. """ - Z = as_xparray(Z, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') # We expect the i'th value to be greater than its successor. - return (Z[1:, 2] >= Z[:-1, 2]).all() + return xp.all(Z[1:, 2] >= Z[:-1, 2]) def is_valid_im(R, warning=False, throw=False, name=None): @@ -2149,13 +2153,13 @@ def is_valid_im(R, warning=False, throw=False, name=None): if R.shape[0] < 1: raise ValueError('Inconsistency matrix %smust have at least one ' 'row.' % name_str) - if (R[:, 0] < 0).any(): + if xp.any(R[:, 0] < 0): raise ValueError('Inconsistency matrix %scontains negative link ' 'height means.' % name_str) - if (R[:, 1] < 0).any(): + if xp.any(R[:, 1] < 0): raise ValueError('Inconsistency matrix %scontains negative link ' 'height standard deviations.' % name_str) - if (R[:, 2] < 0).any(): + if xp.any(R[:, 2] < 0): raise ValueError('Inconsistency matrix %scontains negative link ' 'counts.' % name_str) except Exception as e: @@ -2266,13 +2270,13 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): 'observations.') n = Z.shape[0] if n > 1: - if ((Z[:, 0] < 0).any() or (Z[:, 1] < 0).any()): + if (xp.any(Z[:, 0] < 0) or xp.any(Z[:, 1] < 0)): raise ValueError('Linkage %scontains negative indices.' % name_str) - if (Z[:, 2] < 0).any(): + if xp.any(Z[:, 2] < 0): raise ValueError('Linkage %scontains negative distances.' % name_str) - if (Z[:, 3] < 0).any(): + if xp.any(Z[:, 3] < 0): raise ValueError('Linkage %scontains negative counts.' % name_str) if _check_hierarchy_uses_cluster_before_formed(Z): @@ -2303,10 +2307,10 @@ def _check_hierarchy_uses_cluster_more_than_once(Z): n = Z.shape[0] + 1 chosen = set() for i in range(0, n - 1): - if (Z[i, 0] in chosen) or (Z[i, 1] in chosen) or Z[i, 0] == Z[i, 1]: + if (float(Z[i, 0]) in chosen) or (float(Z[i, 1]) in chosen) or Z[i, 0] == Z[i, 1]: return True - chosen.add(Z[i, 0]) - chosen.add(Z[i, 1]) + chosen.add(float(Z[i, 0])) + chosen.add(float(Z[i, 1])) return False @@ -2570,6 +2574,8 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') + t = xp.asarray(t) + n = Z.shape[0] + 1 T = np.zeros((n,), dtype='i') @@ -2593,13 +2599,15 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): elif criterion == 'distance': _hierarchy.cluster_dist(Z, T, float(t), int(n)) elif criterion == 'maxclust': - _hierarchy.cluster_maxclust_dist(Z, T, int(n), int(t)) + t_ = xp.astype(t, xp.int64) + _hierarchy.cluster_maxclust_dist(Z, T, int(n), t_) elif criterion == 'monocrit': [monocrit] = _copy_arrays_if_base_present([monocrit]) _hierarchy.cluster_monocrit(Z, monocrit, T, float(t), int(n)) elif criterion == 'maxclust_monocrit': [monocrit] = _copy_arrays_if_base_present([monocrit]) - _hierarchy.cluster_maxclust_monocrit(Z, monocrit, T, int(n), int(t)) + t_ = xp.astype(t, xp.int64) + _hierarchy.cluster_maxclust_monocrit(Z, monocrit, T, int(n), t_) else: raise ValueError('Invalid cluster formation criterion: %s' % str(criterion)) @@ -3299,8 +3307,13 @@ def llf(id): raise ValueError("orientation must be one of 'top', 'left', " "'bottom', or 'right'") - if labels is not None and Z.shape[0] + 1 != len(labels): - raise ValueError("Dimensions of Z and labels must be consistent.") + if labels is not None: + try: + len_labels = len(labels) + except (TypeError, AttributeError): + len_labels = labels.shape[0] + if Z.shape[0] + 1 != len_labels: + raise ValueError("Dimensions of Z and labels must be consistent.") is_valid_linkage(Z, throw=True, name='Z') Zs = Z.shape @@ -3432,7 +3445,7 @@ def _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, def _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, - i, labels, show_leaf_counts): + i, labels, show_leaf_counts, xp): # If the leaf id structure is not None and is a list then the caller # to dendrogram has indicated that cluster id's corresponding to the # leaf nodes should be recorded. @@ -3444,21 +3457,21 @@ def _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, ivl.append(leaf_label_func(int(i))) else: if show_leaf_counts: - ivl.append("(" + str(int(Z[i - n, 3])) + ")") + ivl.append("(" + str(xp.astype(Z[i - n, 3], xp.int64)) + ")") else: ivl.append("") -def _append_contraction_marks(Z, iv, i, n, contraction_marks): - _append_contraction_marks_sub(Z, iv, int(Z[i - n, 0]), n, contraction_marks) - _append_contraction_marks_sub(Z, iv, int(Z[i - n, 1]), n, contraction_marks) +def _append_contraction_marks(Z, iv, i, n, contraction_marks, xp): + _append_contraction_marks_sub(Z, iv, xp.astype(Z[i - n, 0], xp.int64), n, contraction_marks, xp) + _append_contraction_marks_sub(Z, iv, xp.astype(Z[i - n, 1], xp.int64), n, contraction_marks, xp) -def _append_contraction_marks_sub(Z, iv, i, n, contraction_marks): +def _append_contraction_marks_sub(Z, iv, i, n, contraction_marks, xp): if i >= n: contraction_marks.append((iv, Z[i - n, 2])) - _append_contraction_marks_sub(Z, iv, int(Z[i - n, 0]), n, contraction_marks) - _append_contraction_marks_sub(Z, iv, int(Z[i - n, 1]), n, contraction_marks) + _append_contraction_marks_sub(Z, iv, xp.astype(Z[i - n, 0], xp.int64), n, contraction_marks, xp) + _append_contraction_marks_sub(Z, iv, xp.astype(Z[i - n, 1], xp.int64), n, contraction_marks, xp) def _dendrogram_calculate_info(Z, p, truncate_mode, @@ -3510,6 +3523,7 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, the target node. """ + xp = array_namespace(Z) if n == 0: raise ValueError("Invalid singleton cluster count n.") @@ -3524,9 +3538,9 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, d = Z[i - n, 2] _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, i, labels, - show_leaf_counts) + show_leaf_counts, xp) if contraction_marks is not None: - _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks) + _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks, xp) return (iv + 5.0, 10.0, 0.0, d) elif i < n: _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, @@ -3537,9 +3551,9 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, d = Z[i - n, 2] _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, i, labels, - show_leaf_counts) + show_leaf_counts, xp) if contraction_marks is not None: - _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks) + _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks, xp) return (iv + 5.0, 10.0, 0.0, d) elif i < n: _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, @@ -3557,8 +3571,8 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, # !!! Otherwise, we don't have a leaf node, so work on plotting a # non-leaf node. # Actual indices of a and b - aa = int(Z[i - n, 0]) - ab = int(Z[i - n, 1]) + aa = xp.astype(Z[i - n, 0], xp.int64) + ab = xp.astype(Z[i - n, 1], xp.int64) if aa >= n: # The number of singletons below cluster a na = Z[aa - n, 3] @@ -4169,7 +4183,7 @@ def leaders(Z, T): if T.dtype != xp.int32: raise TypeError('T must be a one-dimensional array of integers.') is_valid_linkage(Z, throw=True, name='Z') - if len(T) != Z.shape[0] + 1: + if T.shape[0] != Z.shape[0] + 1: raise ValueError('Mismatch: len(T)!=Z.shape[0] + 1.') Cl = np.unique(T) diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index 7ba0d000f386..bed7679224ef 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -289,7 +289,7 @@ def test_leaders_single(self, xp): Lright = (xp.asarray([53, 55, 56]), xp.asarray([2, 3, 1])) T = xp.asarray(T, dtype=xp.int32) L = leaders(Z, T) - assert_allclose(xp.concatenate(L), xp.concatenate(Lright), rtol=1e-15) + assert_allclose(np.concatenate(L), np.concatenate(Lright), rtol=1e-15) class TestIsIsomorphic: @@ -397,7 +397,7 @@ def check_is_valid_linkage_various_size(self, nrow, ncol, valid, xp): def test_is_valid_linkage_int_type(self, xp): # Tests is_valid_linkage(Z) with integer type. Z = xp.asarray([[0, 1, 3.0, 2], - [3, 2, 4.0, 3]], dtype=int) + [3, 2, 4.0, 3]], dtype=xp.int64) assert_(is_valid_linkage(Z) is False) assert_raises(TypeError, is_valid_linkage, Z, throw=True) @@ -473,7 +473,7 @@ class TestIsValidInconsistent: def test_is_valid_im_int_type(self, xp): # Tests is_valid_im(R) with integer type. R = xp.asarray([[0, 1, 3.0, 2], - [3, 2, 4.0, 3]], dtype=int) + [3, 2, 4.0, 3]], dtype=xp.int64) assert_(is_valid_im(R) is False) assert_raises(TypeError, is_valid_im, R, throw=True) @@ -938,7 +938,7 @@ def test_labels_as_array_or_list(self, xp): Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') labels = xp.asarray([1, 3, 2, 6, 4, 5]) result1 = dendrogram(Z, labels=labels, no_plot=True) - result2 = dendrogram(Z, labels=labels.tolist(), no_plot=True) + result2 = dendrogram(Z, labels=list(labels), no_plot=True) assert result1 == result2 @array_api_compatible @@ -1122,11 +1122,11 @@ def calculate_maximum_distances(Z, xp): left = Z[i, 0] right = Z[i, 1] if left >= n: - q[0] = B[int(left) - n] + q[0] = B[xp.astype(left, xp.int64) - n] if right >= n: - q[1] = B[int(right) - n] + q[1] = B[xp.astype(right, xp.int64) - n] q[2] = Z[i, 2] - B[i] = q.max() + B[i] = xp.max(q) return B @@ -1140,11 +1140,11 @@ def calculate_maximum_inconsistencies(Z, R, k=3, xp=np): left = Z[i, 0] right = Z[i, 1] if left >= n: - q[0] = B[int(left) - n] + q[0] = B[xp.asarray(left, dtype=xp.int64) - n] if right >= n: - q[1] = B[int(right) - n] + q[1] = B[xp.asarray(right, dtype=xp.int64) - n] q[2] = R[i, k] - B[i] = q.max() + B[i] = xp.max(q) return B diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 5f2bf671dd61..63bb72a9de81 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -287,7 +287,7 @@ def test_kmeans2_high_dim(self, xp): # test kmeans2 when the number of dimensions exceeds the number # of input points data = xp.asarray(TESTDATA_2D) - data = data.reshape((20, 20))[:10] + data = xp.reshape(data, (20, 20))[:10, :] kmeans2(data, xp.asarray(2)) @skip_if_array_api_gpu @@ -314,7 +314,7 @@ def test_kmeans2_init(self, xp): reason='Fails with MemoryError in Wine.') def test_krandinit(self, xp): data = xp.asarray(TESTDATA_2D) - datas = [data.reshape((200, 2)), data.reshape((20, 20))[:10]] + datas = [xp.reshape(data, (200, 2)), xp.reshape(data, (20, 20))[:10, :]] k = int(1e6) for data in datas: # check that np.random.Generator can be used (numpy >= 1.17) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 3c7b6df8eccd..70b025e347e8 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -133,7 +133,7 @@ def whiten(obs, check_finite=True): obs = as_xparray(obs, check_finite=check_finite, xp=xp) std_dev = xp.std(obs, axis=0) zero_std_mask = std_dev == 0 - if zero_std_mask.any(): + if xp.any(zero_std_mask): std_dev[zero_std_mask] = 1.0 warnings.warn("Some columns have standard deviation zero. " "The values of these columns will not change.", @@ -308,7 +308,7 @@ def _kmeans(obs, guess, thresh=1e-5, xp=None): while diff > thresh: # compute membership and distances between obs and code_book obs_code, distort = vq(obs, code_book, check_finite=False) - prev_avg_dists.append(distort.mean(axis=-1)) + prev_avg_dists.append(xp.mean(distort, axis=-1)) # recalc code_book as centroids of associated obs obs = np.asarray(obs) obs_code = np.asarray(obs_code) @@ -319,7 +319,7 @@ def _kmeans(obs, guess, thresh=1e-5, xp=None): code_book = xp.asarray(code_book) has_members = xp.asarray(has_members) code_book = code_book[has_members] - diff = xp.absolute(prev_avg_dists[0] - prev_avg_dists[1]) + diff = xp.abs(prev_avg_dists[0] - prev_avg_dists[1]) return code_book, prev_avg_dists[1] @@ -515,7 +515,7 @@ def _kpoints(data, k, rng, xp): """ idx = rng.choice(data.shape[0], size=int(k), replace=False) - return data[idx] + return data[idx, ...] def _krandinit(data, k, rng, xp): @@ -541,7 +541,7 @@ def _krandinit(data, k, rng, xp): A 'k' by 'N' containing the initial centroids """ - mu = data.mean(axis=0) + mu = xp.mean(data, axis=0) if data.ndim == 1: cov = xp.cov(data) @@ -553,7 +553,7 @@ def _krandinit(data, k, rng, xp): _, s, vh = xp.linalg.svd(data - mu, full_matrices=False) x = rng.standard_normal(size=(k, size(s))) x = xp.asarray(x) - sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - xp.asarray(1)) + sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - xp.asarray(1.)) x = xp.matmul(x, sVh) else: cov = xp.atleast_2d(xp.cov(data.T)) @@ -599,13 +599,13 @@ def _kpp(data, k, rng, xp): # k should be an integer, NOT a NumPy # scalar array thing... if not isinstance(k, int): - k = k.item() + k = xp.astype(k, xp.int64) init = xp.empty((k, dims)) for i in range(k): if i == 0: - init[i, :] = data[rng_integers(rng, data.shape[0])] + init[i, :] = data[rng_integers(rng, data.shape[0]), :] else: D2 = cdist(init[:i,:], data, metric='sqeuclidean').min(axis=0) @@ -613,7 +613,7 @@ def _kpp(data, k, rng, xp): cumprobs = probs.cumsum() r = rng.uniform() cumprobs = np.asarray(cumprobs) - init[i, :] = data[np.searchsorted(cumprobs, r)] + init[i, :] = data[np.searchsorted(cumprobs, r), :] return init @@ -786,7 +786,7 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', if minit == 'matrix' or size(code_book) > 1: if data.ndim != code_book.ndim: raise ValueError("k array doesn't match data rank") - nc = len(code_book) + nc = code_book.shape[0] if data.ndim > 1 and code_book.shape[1] != d: raise ValueError("k array doesn't match data dimension") else: From f7f25680a2f8ebaca2ed5aae2a7f2beca1f860dc Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 16 Jun 2023 14:40:05 +0200 Subject: [PATCH 69/87] ENH: add convenient function atleast_nd --- scipy/_lib/_array_api.py | 9 +++++++++ scipy/cluster/hierarchy.py | 1 + scipy/cluster/vq.py | 7 +++++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index 5201474b2953..fcee233c1287 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -174,6 +174,15 @@ def as_xparray_namespace(*arrays): return *arrays, xp +def atleast_nd(x, *, ndim, xp): + """Recursively expand the dimension to have at least `ndim`.""" + x = xp.asarray(x) + if x.ndim < ndim: + x = xp.expand_dims(x, axis=0) + x = atleast_nd(x, ndim=ndim, xp=xp) + return x + + def to_numpy(array, xp): """Convert `array` into a NumPy ndarray on the CPU. diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index f5225c3aa9d0..a14d5b563a5d 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -1369,6 +1369,7 @@ def cut_tree(Z, n_clusters=None, height=None): for i, node in enumerate(nodes): idx = node.pre_order() this_group = as_xparray(last_group, copy=True, xp=xp) + # TODO ARRAY_API complex indexing not supported this_group[idx] = xp.min(last_group[idx]) this_group[this_group > xp.max(last_group[idx])] -= 1 if i + 1 in cols_idx: diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 70b025e347e8..54cc74aceb25 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -67,7 +67,9 @@ import warnings import numpy as np from collections import deque -from scipy._lib._array_api import as_xparray, array_namespace, size, isdtype +from scipy._lib._array_api import ( + as_xparray, array_namespace, size, isdtype, atleast_nd +) from scipy._lib._util import check_random_state, rng_integers from scipy.spatial.distance import cdist @@ -556,7 +558,8 @@ def _krandinit(data, k, rng, xp): sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - xp.asarray(1.)) x = xp.matmul(x, sVh) else: - cov = xp.atleast_2d(xp.cov(data.T)) + # TODO ARRAY_API cov not supported + cov = atleast_nd(xp.cov(data.T), ndim=2, xp=xp) # k rows, d cols (one row = one obs) # Generate k sample of a random variable ~ Gaussian(mu, cov) From 050a96e93b4361ad6b333eb3c4090cc267511dea Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 16 Jun 2023 14:41:32 +0200 Subject: [PATCH 70/87] TST: fix astype --- scipy/cluster/tests/test_hierarchy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index bed7679224ef..5b140400d7ef 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -1122,9 +1122,9 @@ def calculate_maximum_distances(Z, xp): left = Z[i, 0] right = Z[i, 1] if left >= n: - q[0] = B[xp.astype(left, xp.int64) - n] + q[0] = B[xp.asarray(left, dtype=xp.int64) - n] if right >= n: - q[1] = B[xp.astype(right, xp.int64) - n] + q[1] = B[xp.asarray(right, dtype=xp.int64) - n] q[2] = Z[i, 2] B[i] = xp.max(q) return B From f2dfb3c85d4cb69e03acfaef6229949f823247ac Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 16 Jun 2023 15:26:43 +0200 Subject: [PATCH 71/87] MAINT: fix mypy --- scipy/_lib/_array_api.py | 6 +++--- scipy/conftest.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index fcee233c1287..da73008f799b 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -10,15 +10,15 @@ import numpy as np # probably want to vendor it (submodule) -import array_api_compat +import array_api_compat # type: ignore[import] from array_api_compat import size -import array_api_compat.numpy +import array_api_compat.numpy # type: ignore[import] __all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace', 'isdtype'] # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn -array_api_dispatch = os.environ.get("array_api_dispatch", False) +array_api_dispatch = os.environ.get("array_api_dispatch", "") SCIPY_ARRAY_API = os.environ.get("SCIPY_ARRAY_API", array_api_dispatch) SCIPY_DEVICE = os.environ.get("SCIPY_DEVICE", "cpu") diff --git a/scipy/conftest.py b/scipy/conftest.py index 11a410612428..9d7887be2170 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -102,12 +102,12 @@ def check_fpu_mode(request): # Array API backend handling xp_available_backends = {'numpy': np} -if SCIPY_ARRAY_API: +if SCIPY_ARRAY_API != "": # fill the dict of backends with available libraries xp_available_backends.update({'numpy.array_api': numpy.array_api}) try: - import torch + import torch # type: ignore[import] xp_available_backends.update({'pytorch': torch}) # can use `mps` or `cpu` torch.set_default_device(SCIPY_DEVICE) @@ -115,7 +115,7 @@ def check_fpu_mode(request): pass try: - import cupy + import cupy # type: ignore[import] xp_available_backends.update({'cupy': cupy}) except ImportError: pass From 4787f5028a5d869e68d502ee4c7f32eac1292138 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Fri, 16 Jun 2023 15:37:01 +0200 Subject: [PATCH 72/87] MAINT: add array-api-compat in the deps for now. [skip cirrus] [skip circle] --- environment.yml | 1 + pyproject.toml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/environment.yml b/environment.yml index 6eb85bbcc683..0aa13c7a2eff 100644 --- a/environment.yml +++ b/environment.yml @@ -15,6 +15,7 @@ dependencies: - meson-python - ninja - numpy + - array-api-compat - openblas - pkg-config # note: not available on Windows - libblas=*=*openblas # helps avoid pulling in MKL diff --git a/pyproject.toml b/pyproject.toml index 940cb5760b06..cba19a5ad78b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,9 @@ requires = [ # doesn't list it as a runtime requirement (at least in 0.5.0) "wheel", + # Array API compatibility + "array-api-compat", + # NumPy dependencies - to update these, sync from # https://github.com/scipy/oldest-supported-numpy/, and then # update minimum version to match our install_requires min version From a38cd2ee7838fea0f4cd2d3f17c6e24c593b2c89 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 20 Jun 2023 22:11:41 +0200 Subject: [PATCH 73/87] MAINT: remove custom isdtype in favour of xp.isdtype --- scipy/_lib/_array_api.py | 49 +------------------------------------- scipy/cluster/hierarchy.py | 8 +++---- scipy/cluster/vq.py | 4 ++-- 3 files changed, 7 insertions(+), 54 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index da73008f799b..fa5e2192f402 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -14,7 +14,7 @@ from array_api_compat import size import array_api_compat.numpy # type: ignore[import] -__all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace', 'isdtype'] +__all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace'] # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn @@ -198,50 +198,3 @@ def to_numpy(array, xp): return array.get() return np.asarray(array) - - -def isdtype(dtype, kind, *, xp): - """Returns a boolean indicating whether a provided dtype is of type "kind". - - Included in the v2022.12 of the Array API spec. - https://data-apis.org/array-api/latest/API_specification/generated/array_api.isdtype.html - """ - if isinstance(kind, tuple): - return any(_isdtype_single(dtype, k, xp=xp) for k in kind) - else: - return _isdtype_single(dtype, kind, xp=xp) - - -def _isdtype_single(dtype, kind, *, xp): - if isinstance(kind, str): - if kind == "bool": - return dtype == xp.bool - elif kind == "signed integer": - return dtype in {xp.int8, xp.int16, xp.int32, xp.int64} - elif kind == "unsigned integer": - return dtype in {xp.uint8, xp.uint16, xp.uint32, xp.uint64} - elif kind == "integral": - return any( - _isdtype_single(dtype, k, xp=xp) - for k in ("signed integer", "unsigned integer") - ) - elif kind == "real floating": - return dtype in {xp.float32, xp.float64} - elif kind == "complex floating": - # Some name spaces do not have complex, such as cupy.array_api - # and numpy.array_api - complex_dtypes = set() - if hasattr(xp, "complex64"): - complex_dtypes.add(xp.complex64) - if hasattr(xp, "complex128"): - complex_dtypes.add(xp.complex128) - return dtype in complex_dtypes - elif kind == "numeric": - return any( - _isdtype_single(dtype, k, xp=xp) - for k in ("integral", "real floating", "complex floating") - ) - else: - raise ValueError(f"Unrecognized data type kind: {kind!r}") - else: - return dtype == kind diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index a14d5b563a5d..3bd49d68908f 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -3446,7 +3446,7 @@ def _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, def _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, - i, labels, show_leaf_counts, xp): + i, labels, show_leaf_counts): # If the leaf id structure is not None and is a list then the caller # to dendrogram has indicated that cluster id's corresponding to the # leaf nodes should be recorded. @@ -3458,7 +3458,7 @@ def _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, ivl.append(leaf_label_func(int(i))) else: if show_leaf_counts: - ivl.append("(" + str(xp.astype(Z[i - n, 3], xp.int64)) + ")") + ivl.append("(" + str(np.asarray(Z[i - n, 3], dtype=np.int64)) + ")") else: ivl.append("") @@ -3539,7 +3539,7 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, d = Z[i - n, 2] _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, i, labels, - show_leaf_counts, xp) + show_leaf_counts) if contraction_marks is not None: _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks, xp) return (iv + 5.0, 10.0, 0.0, d) @@ -3552,7 +3552,7 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, d = Z[i - n, 2] _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, i, labels, - show_leaf_counts, xp) + show_leaf_counts) if contraction_marks is not None: _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks, xp) return (iv + 5.0, 10.0, 0.0, d) diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 54cc74aceb25..ea8664dd6880 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -68,7 +68,7 @@ import numpy as np from collections import deque from scipy._lib._array_api import ( - as_xparray, array_namespace, size, isdtype, atleast_nd + as_xparray, array_namespace, size, atleast_nd ) from scipy._lib._util import check_random_state, rng_integers from scipy.spatial.distance import cdist @@ -209,7 +209,7 @@ def vq(obs, code_book, check_finite=True): c_obs = xp.astype(obs, ct, copy=False) c_code_book = xp.astype(code_book, ct, copy=False) - if isdtype(ct, kind='real floating', xp=xp): + if xp.isdtype(ct, kind='real floating'): c_obs = np.asarray(c_obs) c_code_book = np.asarray(c_code_book) result = _vq.vq(c_obs, c_code_book) From 2d12ad482eb82539b029715497c55f8bbf32aba6 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 20 Jun 2023 22:13:00 +0200 Subject: [PATCH 74/87] TST: move to_numpy helper to tests --- scipy/_lib/_array_api.py | 17 ----------------- scipy/_lib/tests/test_array_api.py | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index fa5e2192f402..e0f14f60937c 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -181,20 +181,3 @@ def atleast_nd(x, *, ndim, xp): x = xp.expand_dims(x, axis=0) x = atleast_nd(x, ndim=ndim, xp=xp) return x - - -def to_numpy(array, xp): - """Convert `array` into a NumPy ndarray on the CPU. - - ONLY FOR TESTING - """ - xp_name = xp.__name__ - - if xp_name in {"array_api_compat.torch", "torch"}: - return array.cpu().numpy() - elif xp_name == "cupy.array_api": - return array._array.get() - elif xp_name in {"array_api_compat.cupy", "cupy"}: # pragma: nocover - return array.get() - - return np.asarray(array) diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 84d77c3328ac..80c11dfc987e 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -16,6 +16,20 @@ ) +def to_numpy(array, xp): + """Convert `array` into a NumPy ndarray on the CPU. From sklearn.""" + xp_name = xp.__name__ + + if xp_name in {"array_api_compat.torch", "torch"}: + return array.cpu().numpy() + elif xp_name == "cupy.array_api": + return array._array.get() + elif xp_name in {"array_api_compat.cupy", "cupy"}: # pragma: nocover + return array.get() + + return np.asarray(array) + + def test_array_namespace(): x, y = np.array([0, 1, 2]), np.array([0, 1, 2]) xp = array_namespace(x, y) From 7aea9d3cdda10ef181abe259cf5b8b4d681faf8f Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 20 Jun 2023 22:14:58 +0200 Subject: [PATCH 75/87] Revert "MAINT: add array-api-compat in the deps for now." This reverts commit 4787f5028a5d869e68d502ee4c7f32eac1292138. --- .github/workflows/array_api.yml | 1 - environment.yml | 1 - pyproject.toml | 3 --- 3 files changed, 5 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 89c5771040d3..19c2ef9e3e6a 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -53,7 +53,6 @@ jobs: run: | python -m pip install numpy cython pytest pytest-xdist pytest-timeout pybind11 mpmath gmpy2 pythran ninja meson click rich-click doit pydevtool pooch # Packages for Array API testing - python -m pip install array-api-compat python -m pip install torch<2.1 --index-url https://download.pytorch.org/whl/cpu - name: Prepare compiler cache diff --git a/environment.yml b/environment.yml index 0aa13c7a2eff..6eb85bbcc683 100644 --- a/environment.yml +++ b/environment.yml @@ -15,7 +15,6 @@ dependencies: - meson-python - ninja - numpy - - array-api-compat - openblas - pkg-config # note: not available on Windows - libblas=*=*openblas # helps avoid pulling in MKL diff --git a/pyproject.toml b/pyproject.toml index cba19a5ad78b..940cb5760b06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,9 +18,6 @@ requires = [ # doesn't list it as a runtime requirement (at least in 0.5.0) "wheel", - # Array API compatibility - "array-api-compat", - # NumPy dependencies - to update these, sync from # https://github.com/scipy/oldest-supported-numpy/, and then # update minimum version to match our install_requires min version From 0e2c5b1e0bd20c9d6b9be124d802b9c0cec2b6cd Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 20 Jun 2023 22:39:52 +0200 Subject: [PATCH 76/87] MAINT: add array-api-compat as a submodule --- .gitmodules | 3 +++ LICENSES_bundled.txt | 5 +++++ scipy/_lib/array_api_compat | 1 + scipy/_lib/meson.build | 3 +++ 4 files changed, 12 insertions(+) create mode 160000 scipy/_lib/array_api_compat diff --git a/.gitmodules b/.gitmodules index 0821bf7ade07..e24f0399dc20 100644 --- a/.gitmodules +++ b/.gitmodules @@ -17,3 +17,6 @@ path = scipy/_lib/boost_math url = https://github.com/boostorg/math.git shallow = true +[submodule "scipy/_lib/array_api_compat"] + path = scipy/_lib/array_api_compat + url = https://github.com/data-apis/array-api-compat.git diff --git a/LICENSES_bundled.txt b/LICENSES_bundled.txt index aacb2372d8a4..1e2cc7d84b0d 100644 --- a/LICENSES_bundled.txt +++ b/LICENSES_bundled.txt @@ -256,3 +256,8 @@ Files: scipy/stats/_rcont/[logfactorial.h,logfactorial.c] License 3-Clause BSD For details, see header inside scipy/stats/_rcont/logfactorial.h and scipy/stats/_rcont/logfactorial.c + +Name: array-api-compat +Files: scipy/_lib/array-api-compat/* +License: MIT + For details, see scipy/optimize/_highs/LICENCE diff --git a/scipy/_lib/array_api_compat b/scipy/_lib/array_api_compat new file mode 160000 index 000000000000..34e9d0ca7f18 --- /dev/null +++ b/scipy/_lib/array_api_compat @@ -0,0 +1 @@ +Subproject commit 34e9d0ca7f18a0256c131f1c473eb9b5e13af92c diff --git a/scipy/_lib/meson.build b/scipy/_lib/meson.build index 95551d1272b0..c2c919a4fd27 100644 --- a/scipy/_lib/meson.build +++ b/scipy/_lib/meson.build @@ -8,6 +8,9 @@ endif if not fs.exists('unuran/README.md') error('Missing the `unuran` submodule! Run `git submodule update --init` to fix this.') endif +if not fs.exists('array_api_compat/README.md') + error('Missing the `array_api_compat` submodule! Run `git submodule update --init` to fix this.') +endif _lib_pxd = [ fs.copyfile('__init__.py'), From 15286a646f7b1b5faac1103a7aa0794dc8e6ff39 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Tue, 20 Jun 2023 23:33:33 +0200 Subject: [PATCH 77/87] MAINT: add to meson in a "verbose" way --- scipy/_lib/meson.build | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/scipy/_lib/meson.build b/scipy/_lib/meson.build index c2c919a4fd27..aba5cb0a76b6 100644 --- a/scipy/_lib/meson.build +++ b/scipy/_lib/meson.build @@ -124,5 +124,32 @@ py3.install_sources( subdir: 'scipy/_lib' ) +python_sources = [ + 'array_api_compat/array_api_compat/__init__.py', + 'array_api_compat/array_api_compat/_internal.py', + 'array_api_compat/array_api_compat/common/__init__.py', + 'array_api_compat/array_api_compat/common/_aliases.py', + 'array_api_compat/array_api_compat/common/_helpers.py', + 'array_api_compat/array_api_compat/common/_linalg.py', + 'array_api_compat/array_api_compat/common/_typing.py', + 'array_api_compat/array_api_compat/cupy/__init__.py', + 'array_api_compat/array_api_compat/cupy/_aliases.py', + 'array_api_compat/array_api_compat/cupy/_typing.py', + 'array_api_compat/array_api_compat/cupy/linalg.py', + 'array_api_compat/array_api_compat/numpy/__init__.py', + 'array_api_compat/array_api_compat/numpy/_aliases.py', + 'array_api_compat/array_api_compat/numpy/_typing.py', + 'array_api_compat/array_api_compat/numpy/linalg.py', + 'array_api_compat/array_api_compat/torch/__init__.py', + 'array_api_compat/array_api_compat/torch/_aliases.py', + 'array_api_compat/array_api_compat/torch/linalg.py', +] + +py3.install_sources( + python_sources, + preserve_path: true, + subdir: 'scipy/_lib' +) + subdir('_uarray') subdir('tests') From 92814ac91cb15c2641953584c8a672a62e670556 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 21 Jun 2023 00:34:49 +0200 Subject: [PATCH 78/87] MAINT: adjust array_api_compat imports. [skip cirrus] [skip circle] --- scipy/_lib/_array_api.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index e0f14f60937c..a8d1144c9bbc 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -9,10 +9,9 @@ import os import numpy as np -# probably want to vendor it (submodule) -import array_api_compat # type: ignore[import] -from array_api_compat import size -import array_api_compat.numpy # type: ignore[import] +import scipy._lib.array_api_compat.array_api_compat as array_api_compat # type: ignore[import] +from scipy._lib.array_api_compat.array_api_compat import size +import scipy._lib.array_api_compat.array_api_compat.numpy as array_api_compat_numpy # type: ignore[import] __all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace'] @@ -85,7 +84,7 @@ def array_namespace(*arrays): """ if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: # here we could wrap the namespace if needed - return array_api_compat.numpy + return array_api_compat_numpy arrays = [array for array in arrays if array is not None] From b7455f2b7af0cd4dc29f4422b817a47a564614b5 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 21 Jun 2023 01:03:44 +0200 Subject: [PATCH 79/87] MAINT: fix meson for array_api_compat tests. [skip cirrus] [skip circle] --- scipy/_lib/meson.build | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scipy/_lib/meson.build b/scipy/_lib/meson.build index aba5cb0a76b6..4cabbc3e194b 100644 --- a/scipy/_lib/meson.build +++ b/scipy/_lib/meson.build @@ -143,6 +143,10 @@ python_sources = [ 'array_api_compat/array_api_compat/torch/__init__.py', 'array_api_compat/array_api_compat/torch/_aliases.py', 'array_api_compat/array_api_compat/torch/linalg.py', + 'array_api_compat/tests/test_common.py', + 'array_api_compat/tests/test_isdtype.py', + 'array_api_compat/tests/test_vendoring.py', + 'array_api_compat/tests/test_array_namespace.py', ] py3.install_sources( From 5744618d875f3f561f5277709faeff56ebf3678d Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 21 Jun 2023 17:00:17 +0200 Subject: [PATCH 80/87] TST: fix import and collection --- pytest.ini | 2 +- scipy/_lib/tests/test_array_api.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pytest.ini b/pytest.ini index d390783c6d94..143d2863c6e0 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = -l +addopts = -l --ignore=scipy/_lib/array_api_compat junit_family=xunit2 filterwarnings = diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 80c11dfc987e..5cbfab0b8338 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -5,7 +5,6 @@ from scipy.conftest import array_api_compatible from scipy._lib._array_api import ( _GLOBAL_CONFIG, array_namespace, as_xparray, as_xparray_namespace, - to_numpy ) From 5033b54c306de5aa03ce0e2744b53d426dfc1612 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 21 Jun 2023 17:07:02 +0200 Subject: [PATCH 81/87] MAINT: ignore MyPy errors in vendored array_api_compat. [skip ci] --- mypy.ini | 3 +++ scipy/_lib/_array_api.py | 12 +++++++----- scipy/conftest.py | 8 ++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/mypy.ini b/mypy.ini index 1638181823e7..3f4f4546761c 100644 --- a/mypy.ini +++ b/mypy.ini @@ -656,3 +656,6 @@ ignore_errors = True [mypy-scipy._lib._uarray.*] ignore_errors = True + +[mypy-scipy._lib.array_api_compat.*] +ignore_errors = True diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index a8d1144c9bbc..3c906c0369e5 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -6,19 +6,21 @@ The SciPy use case of the Array API is described on the following page: https://data-apis.org/array-api/latest/use_cases.html#use-case-scipy """ +from __future__ import annotations + import os import numpy as np -import scipy._lib.array_api_compat.array_api_compat as array_api_compat # type: ignore[import] -from scipy._lib.array_api_compat.array_api_compat import size -import scipy._lib.array_api_compat.array_api_compat.numpy as array_api_compat_numpy # type: ignore[import] +import scipy._lib.array_api_compat.array_api_compat as array_api_compat +from scipy._lib.array_api_compat.array_api_compat import size # noqa +import scipy._lib.array_api_compat.array_api_compat.numpy as array_api_compat_numpy __all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace'] # SCIPY_ARRAY_API, array_api_dispatch is used by sklearn -array_api_dispatch = os.environ.get("array_api_dispatch", "") -SCIPY_ARRAY_API = os.environ.get("SCIPY_ARRAY_API", array_api_dispatch) +array_api_dispatch = os.environ.get("array_api_dispatch", False) +SCIPY_ARRAY_API: str | bool = os.environ.get("SCIPY_ARRAY_API", array_api_dispatch) SCIPY_DEVICE = os.environ.get("SCIPY_DEVICE", "cpu") _GLOBAL_CONFIG = { diff --git a/scipy/conftest.py b/scipy/conftest.py index 9d7887be2170..5805dbb1c8de 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -102,7 +102,7 @@ def check_fpu_mode(request): # Array API backend handling xp_available_backends = {'numpy': np} -if SCIPY_ARRAY_API != "": +if SCIPY_ARRAY_API and isinstance(SCIPY_ARRAY_API, str): # fill the dict of backends with available libraries xp_available_backends.update({'numpy.array_api': numpy.array_api}) @@ -122,16 +122,16 @@ def check_fpu_mode(request): # by default, use all available backends if SCIPY_ARRAY_API.lower() != "true": - SCIPY_ARRAY_API = json.loads(SCIPY_ARRAY_API) + SCIPY_ARRAY_API_ = json.loads(SCIPY_ARRAY_API) - if 'all' in SCIPY_ARRAY_API: + if 'all' in SCIPY_ARRAY_API_: pass # same as True else: # only select a subset of backend by filtering out the dict try: xp_available_backends = { backend: xp_available_backends[backend] - for backend in SCIPY_ARRAY_API + for backend in SCIPY_ARRAY_API_ } except KeyError: msg = f"'--array-api-backend' must be in {xp_available_backends.keys()}" From 8bd49ae7eee8d7134c37863816ebd3062792ee9f Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Wed, 21 Jun 2023 20:47:10 +0200 Subject: [PATCH 82/87] TST: add skip_if_array_api_backend [skip circle] [skip cirrus] --- scipy/cluster/tests/test_hierarchy.py | 6 +++++- scipy/cluster/tests/test_vq.py | 8 +++++++- scipy/conftest.py | 19 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index 5b140400d7ef..7dd47a8fec7a 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -48,7 +48,10 @@ from scipy.spatial.distance import pdist from scipy.cluster._hierarchy import Heap from scipy.conftest import ( - skip_if_array_api, array_api_compatible, skip_if_array_api_gpu + array_api_compatible, + skip_if_array_api, + skip_if_array_api_gpu, + skip_if_array_api_backend, ) from . import hierarchy_test_data @@ -1183,6 +1186,7 @@ def test_node_compare(xp): @skip_if_array_api_gpu @array_api_compatible +@skip_if_array_api_backend('numpy.array_api') def test_cut_tree(xp): np.random.seed(23) nobs = 50 diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 63bb72a9de81..28141a65f8a7 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -12,7 +12,10 @@ ClusterError, _krandinit) from scipy.cluster import _vq from scipy.conftest import ( - skip_if_array_api, skip_if_array_api_gpu, array_api_compatible + array_api_compatible, + skip_if_array_api, + skip_if_array_api_gpu, + skip_if_array_api_backend, ) from scipy.sparse._sputils import matrix from scipy._lib._array_api import SCIPY_ARRAY_API, as_xparray @@ -277,6 +280,7 @@ def test_kmeans2_rank1(self, xp): kmeans2(data1, code, iter=2)[0] @array_api_compatible + @skip_if_array_api_backend('numpy.array_api') def test_kmeans2_rank1_2(self, xp): data = xp.asarray(TESTDATA_2D) data1 = data[:, 0] @@ -292,6 +296,7 @@ def test_kmeans2_high_dim(self, xp): @skip_if_array_api_gpu @array_api_compatible + @skip_if_array_api_backend('numpy.array_api') def test_kmeans2_init(self, xp): np.random.seed(12345) data = xp.asarray(TESTDATA_2D) @@ -310,6 +315,7 @@ def test_kmeans2_init(self, xp): kmeans2(data[:, :1], k, minit='random') # special case (1-D) @array_api_compatible + @skip_if_array_api_backend('numpy.array_api') @pytest.mark.skipif(sys.platform == 'win32', reason='Fails with MemoryError in Wine.') def test_krandinit(self, xp): diff --git a/scipy/conftest.py b/scipy/conftest.py index 5805dbb1c8de..02e90970b882 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -151,3 +151,22 @@ def check_fpu_mode(request): SCIPY_ARRAY_API and SCIPY_DEVICE != 'cpu', reason="do not run with Array API on and not on CPU", ) + +def skip_if_array_api_backend(backend): + def wrapper(func): + reason = ( + f"do not run with Array API on and using the backend: {backend}" + ) + # method gets there as a function so we cannot use inspect.ismethod + if '.' in func.__qualname__: + def wrapped(self, *args, xp, **kwargs): + if xp.__name__ == backend: + pytest.skip(reason=reason) + return func(self, *args, xp, **kwargs) + else: + def wrapped(*args, xp, **kwargs): + if xp.__name__ == backend: + pytest.skip(reason=reason) + return func(*args, xp, **kwargs) + return wrapped + return wrapper From 50d7ea4e5ae6e80c99782155d1a3cb6716d8eb11 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 22 Jun 2023 00:56:02 +0200 Subject: [PATCH 83/87] TST: remove some np.matrix tests [skip ci] --- scipy/cluster/tests/test_vq.py | 54 ++++++++++++++++------------------ scipy/conftest.py | 3 +- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 28141a65f8a7..3c852733540d 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -85,40 +85,38 @@ def test_whiten(self, xp): [4.51041982, 0.02640918], [4.38567074, 0.95120889], [2.32191480, 1.63195503]]) - arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] - for tp in arrays: - obs = tp([[0.98744510, 0.82766775], - [0.62093317, 0.19406729], - [0.87545741, 0.00735733], - [0.85124403, 0.26499712], - [0.45067590, 0.45464607]]) - if "cupy" in xp.__name__: - import cupy as cp - cp.testing.assert_allclose(whiten(obs), desired, rtol=1e-5) - else: - assert_allclose(whiten(obs), desired, rtol=1e-5) + + obs = xp.asarray([[0.98744510, 0.82766775], + [0.62093317, 0.19406729], + [0.87545741, 0.00735733], + [0.85124403, 0.26499712], + [0.45067590, 0.45464607]]) + if "cupy" in xp.__name__: + import cupy as cp + cp.testing.assert_allclose(whiten(obs), desired, rtol=1e-5) + else: + assert_allclose(whiten(obs), desired, rtol=1e-5) @array_api_compatible def test_whiten_zero_std(self, xp): desired = np.array([[0., 1.0, 2.86666544], [0., 1.0, 1.32460034], [0., 1.0, 3.74382172]]) - arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] - for tp in arrays: - obs = tp([[0., 1., 0.74109533], - [0., 1., 0.34243798], - [0., 1., 0.96785929]]) - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - - if "cupy" in xp.__name__: - import cupy as cp - cp.testing.assert_allclose(whiten(obs), desired, rtol=1e-5) - else: - assert_allclose(whiten(obs), desired, rtol=1e-5) - - assert_equal(len(w), 1) - assert_(issubclass(w[-1].category, RuntimeWarning)) + + obs = xp.asarray([[0., 1., 0.74109533], + [0., 1., 0.34243798], + [0., 1., 0.96785929]]) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + + if "cupy" in xp.__name__: + import cupy as cp + cp.testing.assert_allclose(whiten(obs), desired, rtol=1e-5) + else: + assert_allclose(whiten(obs), desired, rtol=1e-5) + + assert_equal(len(w), 1) + assert_(issubclass(w[-1].category, RuntimeWarning)) @array_api_compatible def test_whiten_not_finite(self, xp): diff --git a/scipy/conftest.py b/scipy/conftest.py index 02e90970b882..7b424fb4a465 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -152,10 +152,11 @@ def check_fpu_mode(request): reason="do not run with Array API on and not on CPU", ) + def skip_if_array_api_backend(backend): def wrapper(func): reason = ( - f"do not run with Array API on and using the backend: {backend}" + f"do not run with Array API backend: {backend}" ) # method gets there as a function so we cannot use inspect.ismethod if '.' in func.__qualname__: From 84cfe656a8327e76c49e035e32df72b11fb5b383 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 22 Jun 2023 13:57:43 +0200 Subject: [PATCH 84/87] CI: separate step for installing torch [skip cirrus] [skip circle] --- .github/workflows/array_api.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 19c2ef9e3e6a..a6e8baff46f7 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -52,7 +52,9 @@ jobs: - name: Install Python packages run: | python -m pip install numpy cython pytest pytest-xdist pytest-timeout pybind11 mpmath gmpy2 pythran ninja meson click rich-click doit pydevtool pooch - # Packages for Array API testing + + - name: Install PyTorch CPU + run: | python -m pip install torch<2.1 --index-url https://download.pytorch.org/whl/cpu - name: Prepare compiler cache From b9fe7221655d1c6453d62bf0e8339592241ec2e4 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 22 Jun 2023 14:11:55 +0200 Subject: [PATCH 85/87] CI: fix deps definition [skip cirrus] [skip circle] --- .github/workflows/array_api.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index a6e8baff46f7..df791e984a44 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -54,8 +54,8 @@ jobs: python -m pip install numpy cython pytest pytest-xdist pytest-timeout pybind11 mpmath gmpy2 pythran ninja meson click rich-click doit pydevtool pooch - name: Install PyTorch CPU - run: | - python -m pip install torch<2.1 --index-url https://download.pytorch.org/whl/cpu + run: | + python -m pip install "torch<2.1" --index-url https://download.pytorch.org/whl/cpu - name: Prepare compiler cache id: prep-ccache From 98f8309b2a5b041aee9c179b3abda113394078a1 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 22 Jun 2023 14:40:43 +0200 Subject: [PATCH 86/87] TST: fix string comparison of 'array_api_compat.numpy' due to submodule. [skip cirrus] [skip circle] --- scipy/_lib/tests/test_array_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index 5cbfab0b8338..c72634f8c8bd 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -32,11 +32,11 @@ def to_numpy(array, xp): def test_array_namespace(): x, y = np.array([0, 1, 2]), np.array([0, 1, 2]) xp = array_namespace(x, y) - assert xp.__name__ == 'array_api_compat.numpy' + assert 'array_api_compat.numpy' in xp.__name__ _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False xp = array_namespace(x, y) - assert xp.__name__ == 'array_api_compat.numpy' + assert 'array_api_compat.numpy' in xp.__name__ _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True @@ -51,7 +51,7 @@ def test_asarray(xp): def test_as_xparray_namespace(): x, y = np.array([0, 1, 2]), np.array([0, 1, 2]) x, y, xp_ = as_xparray_namespace(x, y) - assert xp_.__name__ == 'array_api_compat.numpy' + assert 'array_api_compat.numpy' in xp_.__name__ ref = np.array([0, 1, 2]) assert_equal(x, ref) assert_equal(y, ref) @@ -59,7 +59,7 @@ def test_as_xparray_namespace(): _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False x, y, xp_ = as_xparray_namespace(x, y) - assert xp_.__name__ == 'array_api_compat.numpy' + assert 'array_api_compat.numpy' in xp_.__name__ _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True From f0c2ca422123578e91c581041df9696e2c088979 Mon Sep 17 00:00:00 2001 From: Pamphile Roy Date: Thu, 22 Jun 2023 16:52:01 +0200 Subject: [PATCH 87/87] CI: ensure pytest config is used and fix some mypy. [skip cirrus] [skip circle] --- .github/workflows/linux_meson.yml | 4 ++-- .github/workflows/windows.yml | 4 ++-- scipy/_lib/setup.py | 2 ++ scipy/conftest.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/linux_meson.yml b/.github/workflows/linux_meson.yml index 5c18b0d9625f..e6f1e779478b 100644 --- a/.github/workflows/linux_meson.yml +++ b/.github/workflows/linux_meson.yml @@ -201,7 +201,7 @@ jobs: run: | cd doc python3-dbg -m pip install pytest pytest-xdist pytest-timeout mpmath gmpy2 threadpoolctl pooch - python3-dbg -m pytest --pyargs scipy -n2 --durations=10 -m "not slow" + python3-dbg -m pytest -c pytest.ini --pyargs scipy -n2 --durations=10 -m "not slow" ################################################################################# gcc8: @@ -248,7 +248,7 @@ jobs: # can't be in source directory pushd $RUNNER_TEMP export PYTHONOPTIMIZE=2 - python -m pytest --pyargs scipy -n2 --durations=10 + python -m pytest -c pytest.ini --pyargs scipy -n2 --durations=10 popd ################################################################################# diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 5b89bda565c1..0ead6cd86f2c 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -164,7 +164,7 @@ jobs: run: | cd $RUNNER_TEMP # run full test suite - pytest --pyargs scipy + pytest -c pytest.ini --pyargs scipy ############################################################################# @@ -225,4 +225,4 @@ jobs: - name: Test run: | cd $RUNNER_TEMP - pytest --pyargs scipy -m "not slow" + pytest -c pytest.ini --pyargs scipy -m "not slow" diff --git a/scipy/_lib/setup.py b/scipy/_lib/setup.py index 59cd3667b61f..5944f2b7169f 100644 --- a/scipy/_lib/setup.py +++ b/scipy/_lib/setup.py @@ -79,6 +79,8 @@ def get_messagestream_config(ext, build_dir): config.add_subpackage('_uarray') + config.add_subpackage('array_api_compat') + # ensure Boost was checked out and builds config.add_library( 'test_boost_build', diff --git a/scipy/conftest.py b/scipy/conftest.py index 7b424fb4a465..cc0e72dbc764 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -165,7 +165,7 @@ def wrapped(self, *args, xp, **kwargs): pytest.skip(reason=reason) return func(self, *args, xp, **kwargs) else: - def wrapped(*args, xp, **kwargs): + def wrapped(*args, xp, **kwargs): # type: ignore[misc] if xp.__name__ == backend: pytest.skip(reason=reason) return func(*args, xp, **kwargs)