diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml new file mode 100644 index 000000000000..df791e984a44 --- /dev/null +++ b/.github/workflows/array_api.yml @@ -0,0 +1,88 @@ +name: Array API + +on: + push: + branches: + - maintenance/** + pull_request: + branches: + - main + - maintenance/** + +permissions: + contents: read # to fetch code (actions/checkout) + +env: + CCACHE_DIR: "${{ github.workspace }}/.ccache" + INSTALLDIR: "build-install" + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + pytorch_cpu: + name: Linux PyTorch CPU + if: "github.repository == 'scipy/scipy' || github.repository == ''" + runs-on: ubuntu-22.04 + strategy: + matrix: + python-version: ['3.11'] + maintenance-branch: + - ${{ contains(github.ref, 'maintenance/') || contains(github.base_ref, 'maintenance/') }} + exclude: + - maintenance-branch: true + + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' # not using a path to also cache pytorch + + - name: Install Ubuntu dependencies + run: | + sudo apt-get update + sudo apt-get install -y libopenblas-dev libatlas-base-dev liblapack-dev gfortran libgmp-dev libmpfr-dev libsuitesparse-dev ccache libmpc-dev + + - name: Install Python packages + run: | + python -m pip install numpy cython pytest pytest-xdist pytest-timeout pybind11 mpmath gmpy2 pythran ninja meson click rich-click doit pydevtool pooch + + - name: Install PyTorch CPU + run: | + python -m pip install "torch<2.1" --index-url https://download.pytorch.org/whl/cpu + + - name: Prepare compiler cache + id: prep-ccache + shell: bash + run: | + mkdir -p "${CCACHE_DIR}" + echo "dir=$CCACHE_DIR" >> $GITHUB_OUTPUT + NOW=$(date -u +"%F-%T") + echo "timestamp=${NOW}" >> $GITHUB_OUTPUT + + - name: Setup compiler cache + uses: actions/cache@v3 + id: cache-ccache + with: + path: ${{ steps.prep-ccache.outputs.dir }} + key: ${{ github.workflow }}-${{ matrix.python-version }}-ccache-linux-${{ steps.prep-ccache.outputs.timestamp }} + restore-keys: | + ${{ github.workflow }}-${{ matrix.python-version }}-ccache-linux- + + - name: Setup build and install scipy + run: | + python dev.py build + + - name: Test SciPy + run: | + export OMP_NUM_THREADS=2 + export SCIPY_USE_PROPACK=1 + # expand as new modules are added + python dev.py --no-build test --array-api-backend pytorch --array-api-backend numpy -s cluster -- --durations 10 --timeout=60 + python dev.py --no-build test --array-api-backend all --tests scipy/_lib/tests/test_array_api.py diff --git a/.github/workflows/linux_meson.yml b/.github/workflows/linux_meson.yml index 5c18b0d9625f..e6f1e779478b 100644 --- a/.github/workflows/linux_meson.yml +++ b/.github/workflows/linux_meson.yml @@ -201,7 +201,7 @@ jobs: run: | cd doc python3-dbg -m pip install pytest pytest-xdist pytest-timeout mpmath gmpy2 threadpoolctl pooch - python3-dbg -m pytest --pyargs scipy -n2 --durations=10 -m "not slow" + python3-dbg -m pytest -c pytest.ini --pyargs scipy -n2 --durations=10 -m "not slow" ################################################################################# gcc8: @@ -248,7 +248,7 @@ jobs: # can't be in source directory pushd $RUNNER_TEMP export PYTHONOPTIMIZE=2 - python -m pytest --pyargs scipy -n2 --durations=10 + python -m pytest -c pytest.ini --pyargs scipy -n2 --durations=10 popd ################################################################################# diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 5b89bda565c1..0ead6cd86f2c 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -164,7 +164,7 @@ jobs: run: | cd $RUNNER_TEMP # run full test suite - pytest --pyargs scipy + pytest -c pytest.ini --pyargs scipy ############################################################################# @@ -225,4 +225,4 @@ jobs: - name: Test run: | cd $RUNNER_TEMP - pytest --pyargs scipy -m "not slow" + pytest -c pytest.ini --pyargs scipy -m "not slow" diff --git a/.gitmodules b/.gitmodules index 0821bf7ade07..e24f0399dc20 100644 --- a/.gitmodules +++ b/.gitmodules @@ -17,3 +17,6 @@ path = scipy/_lib/boost_math url = https://github.com/boostorg/math.git shallow = true +[submodule "scipy/_lib/array_api_compat"] + path = scipy/_lib/array_api_compat + url = https://github.com/data-apis/array-api-compat.git diff --git a/LICENSES_bundled.txt b/LICENSES_bundled.txt index aacb2372d8a4..1e2cc7d84b0d 100644 --- a/LICENSES_bundled.txt +++ b/LICENSES_bundled.txt @@ -256,3 +256,8 @@ Files: scipy/stats/_rcont/[logfactorial.h,logfactorial.c] License 3-Clause BSD For details, see header inside scipy/stats/_rcont/logfactorial.h and scipy/stats/_rcont/logfactorial.c + +Name: array-api-compat +Files: scipy/_lib/array-api-compat/* +License: MIT + For details, see scipy/optimize/_highs/LICENCE diff --git a/benchmarks/benchmarks/cluster.py b/benchmarks/benchmarks/cluster.py index 1d33608c1866..a7dc73564aff 100644 --- a/benchmarks/benchmarks/cluster.py +++ b/benchmarks/benchmarks/cluster.py @@ -50,7 +50,7 @@ def time_kmeans2(self, k, init): class VQ(Benchmark): - params = [[2, 10, 50], ['float32', 'float64', 'float128']] + params = [[2, 10, 50], ['float32', 'float64']] param_names = ['k', 'dtype'] def __init__(self): diff --git a/dev.py b/dev.py index 099b9dc36d2d..e93c0188d3c2 100644 --- a/dev.py +++ b/dev.py @@ -685,6 +685,7 @@ class Test(Task): $ python dev.py test -t scipy.optimize.tests.test_minimize_constrained $ python dev.py test -s cluster -m full --durations 20 $ python dev.py test -s stats -- --tb=line # `--` passes next args to pytest + $ python dev.py test -b numpy -b pytorch -s cluster ``` """ # noqa: E501 ctx = CONTEXT @@ -716,6 +717,13 @@ class Test(Task): ['--parallel', '-j'], default=1, metavar='N_JOBS', help="Number of parallel jobs for testing" ) + array_api_backend = Option( + ['--array-api-backend', '-b'], default=None, metavar='ARRAY_BACKEND', + multiple=True, + help=( + "Array API backend ('all', 'numpy', 'pytorch', 'numpy.array_api')." + ) + ) # Argument can't have `help=`; used to consume all of `-- arg1 arg2 arg3` pytest_args = Argument( ['pytest_args'], nargs=-1, metavar='PYTEST-ARGS', required=False @@ -756,6 +764,9 @@ def scipy_tests(cls, args, pytest_args): else: tests = None + if len(args.array_api_backend) != 0: + os.environ['SCIPY_ARRAY_API'] = json.dumps(list(args.array_api_backend)) + runner, version, mod_path = get_test_runner(PROJECT_MODULE) # FIXME: changing CWD is not a good practice with working_dir(dirs.site): diff --git a/mypy.ini b/mypy.ini index 1638181823e7..3f4f4546761c 100644 --- a/mypy.ini +++ b/mypy.ini @@ -656,3 +656,6 @@ ignore_errors = True [mypy-scipy._lib._uarray.*] ignore_errors = True + +[mypy-scipy._lib.array_api_compat.*] +ignore_errors = True diff --git a/pytest.ini b/pytest.ini index 04021a5293d4..143d2863c6e0 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = -l +addopts = -l --ignore=scipy/_lib/array_api_compat junit_family=xunit2 filterwarnings = @@ -14,3 +14,4 @@ filterwarnings = ignore:.*The distutils.* is deprecated.*:DeprecationWarning ignore:\s*.*numpy.distutils.*:DeprecationWarning ignore:.*The --rsyncdir command line argument.*:DeprecationWarning + ignore:.*The numpy.array_api submodule is still experimental.*:UserWarning diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py new file mode 100644 index 000000000000..3c906c0369e5 --- /dev/null +++ b/scipy/_lib/_array_api.py @@ -0,0 +1,184 @@ +"""Utility functions to use Python Array API compatible libraries. + +For the context about the Array API see: +https://data-apis.org/array-api/latest/purpose_and_scope.html + +The SciPy use case of the Array API is described on the following page: +https://data-apis.org/array-api/latest/use_cases.html#use-case-scipy +""" +from __future__ import annotations + +import os + +import numpy as np +import scipy._lib.array_api_compat.array_api_compat as array_api_compat +from scipy._lib.array_api_compat.array_api_compat import size # noqa +import scipy._lib.array_api_compat.array_api_compat.numpy as array_api_compat_numpy + +__all__ = ['array_namespace', 'as_xparray', 'as_xparray_namespace'] + + +# SCIPY_ARRAY_API, array_api_dispatch is used by sklearn +array_api_dispatch = os.environ.get("array_api_dispatch", False) +SCIPY_ARRAY_API: str | bool = os.environ.get("SCIPY_ARRAY_API", array_api_dispatch) +SCIPY_DEVICE = os.environ.get("SCIPY_DEVICE", "cpu") + +_GLOBAL_CONFIG = { + "SCIPY_ARRAY_API": SCIPY_ARRAY_API, + "SCIPY_DEVICE": SCIPY_DEVICE, +} + + +def compliance_scipy(*arrays): + """Raise exceptions on known-bad subclasses. + + The following subclasses are not supported and raise and error: + - `np.ma.MaskedArray` + - `numpy.matrix` + - Any array-like which is not Array API compatible + """ + for array in arrays: + if isinstance(array, np.ma.MaskedArray): + raise TypeError("'numpy.ma.MaskedArray' are not supported") + elif isinstance(array, np.matrix): + raise TypeError("'numpy.matrix' are not supported") + elif not array_api_compat.is_array_api_obj(array): + raise TypeError("Only support Array API compatible arrays") + elif array.dtype is np.dtype('O'): + raise TypeError('object arrays are not supported') + + +def _check_finite(array, xp): + """Check for NaNs or Infs.""" + msg = "array must not contain infs or NaNs" + try: + if not xp.all(xp.isfinite(array)): + raise ValueError(msg) + except TypeError: + raise ValueError(msg) + + +def array_namespace(*arrays): + """Get the array API compatible namespace for the arrays xs. + + Parameters + ---------- + *arrays : sequence of array_like + Arrays used to infer the common namespace. + + Returns + ------- + namespace : module + Common namespace. + + Notes + ----- + Thin wrapper around `array_api_compat.array_namespace`. + + 1. Check for the global switch: SCIPY_ARRAY_API. This can also be accessed + dynamically through ``_GLOBAL_CONFIG['SCIPY_ARRAY_API']``. + 2. `compliance_scipy` raise exceptions on known-bad subclasses. See + it's definition for more details. + + When the global switch is False, it defaults to the `numpy` namespace. + In that case, there is no compliance check. This is a convenience to + ease the adoption. Otherwise, arrays must comply with the new rules. + """ + if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: + # here we could wrap the namespace if needed + return array_api_compat_numpy + + arrays = [array for array in arrays if array is not None] + + compliance_scipy(*arrays) + + return array_api_compat.array_namespace(*arrays) + + +def as_xparray( + array, dtype=None, order=None, copy=None, *, xp=None, check_finite=True +): + """Drop-in replacement for `np.asarray`. + + Memory layout parameter `order` is not exposed in the Array API standard. + `order` is only enforced if the input array implementation + is NumPy based, otherwise `order` is just silently ignored. + + The purpose of this helper is to make it possible to share code for data + container validation without memory copies for both downstream use cases. + """ + if xp is None: + xp = array_namespace(array) + if xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"}: + # Use NumPy API to support order + if copy is True: + array = np.array(array, order=order, dtype=dtype) + else: + array = np.asarray(array, order=order, dtype=dtype) + + # At this point array is a NumPy ndarray. We convert it to an array + # container that is consistent with the input's namespace. + array = xp.asarray(array) + else: + array = xp.asarray(array, dtype=dtype, copy=copy) + + if check_finite: + _check_finite(array, xp) + + return array + + +def as_xparray_namespace(*arrays): + """Validate and convert arrays to a common namespace. + + Parameters + ---------- + *arrays : sequence of array_like + Arrays to validate and convert. + + Returns + ------- + *arrays : sequence of array_like + Validated and converted arrays to the common namespace. + namespace : module + Common namespace. + + Notes + ----- + This function is meant to be called from each public function in a SciPy + submodule it does the following: + + 1. Check for the global switch: SCIPY_ARRAY_API. This can also be accessed + dynamically through ``_GLOBAL_CONFIG['SCIPY_ARRAY_API']``. + 2. `compliance_scipy` raise exceptions on known-bad subclasses. See + it's definition for more details. + 3. Determine the namespace, without doing any coercion of array(-like) + inputs. + 4. Call `xp.asarray` on all array. + + Examples + -------- + >>> import numpy as np + >>> x, y, xp = as_xparray_namespace(np.array([0, 1, 2]), np.array([0, 1, 2])) + >>> xp.__name__ + 'array_api_compat.numpy' + >>> x, y + (array([0, 1, 2]), array([0, 1, 2])) + + """ + arrays = list(arrays) + xp = array_namespace(*arrays) + + for i, array in enumerate(arrays): + arrays[i] = xp.asarray(array) + + return *arrays, xp + + +def atleast_nd(x, *, ndim, xp): + """Recursively expand the dimension to have at least `ndim`.""" + x = xp.asarray(x) + if x.ndim < ndim: + x = xp.expand_dims(x, axis=0) + x = atleast_nd(x, ndim=ndim, xp=xp) + return x diff --git a/scipy/_lib/array_api_compat b/scipy/_lib/array_api_compat new file mode 160000 index 000000000000..34e9d0ca7f18 --- /dev/null +++ b/scipy/_lib/array_api_compat @@ -0,0 +1 @@ +Subproject commit 34e9d0ca7f18a0256c131f1c473eb9b5e13af92c diff --git a/scipy/_lib/meson.build b/scipy/_lib/meson.build index 2d3f4fc7a4af..4cabbc3e194b 100644 --- a/scipy/_lib/meson.build +++ b/scipy/_lib/meson.build @@ -8,6 +8,9 @@ endif if not fs.exists('unuran/README.md') error('Missing the `unuran` submodule! Run `git submodule update --init` to fix this.') endif +if not fs.exists('array_api_compat/README.md') + error('Missing the `array_api_compat` submodule! Run `git submodule update --init` to fix this.') +endif _lib_pxd = [ fs.copyfile('__init__.py'), @@ -98,6 +101,7 @@ py3.extension_module('messagestream', python_sources = [ '__init__.py', + '_array_api.py', '_bunch.py', '_ccallback.py', '_disjoint_set.py', @@ -120,5 +124,36 @@ py3.install_sources( subdir: 'scipy/_lib' ) +python_sources = [ + 'array_api_compat/array_api_compat/__init__.py', + 'array_api_compat/array_api_compat/_internal.py', + 'array_api_compat/array_api_compat/common/__init__.py', + 'array_api_compat/array_api_compat/common/_aliases.py', + 'array_api_compat/array_api_compat/common/_helpers.py', + 'array_api_compat/array_api_compat/common/_linalg.py', + 'array_api_compat/array_api_compat/common/_typing.py', + 'array_api_compat/array_api_compat/cupy/__init__.py', + 'array_api_compat/array_api_compat/cupy/_aliases.py', + 'array_api_compat/array_api_compat/cupy/_typing.py', + 'array_api_compat/array_api_compat/cupy/linalg.py', + 'array_api_compat/array_api_compat/numpy/__init__.py', + 'array_api_compat/array_api_compat/numpy/_aliases.py', + 'array_api_compat/array_api_compat/numpy/_typing.py', + 'array_api_compat/array_api_compat/numpy/linalg.py', + 'array_api_compat/array_api_compat/torch/__init__.py', + 'array_api_compat/array_api_compat/torch/_aliases.py', + 'array_api_compat/array_api_compat/torch/linalg.py', + 'array_api_compat/tests/test_common.py', + 'array_api_compat/tests/test_isdtype.py', + 'array_api_compat/tests/test_vendoring.py', + 'array_api_compat/tests/test_array_namespace.py', +] + +py3.install_sources( + python_sources, + preserve_path: true, + subdir: 'scipy/_lib' +) + subdir('_uarray') subdir('tests') diff --git a/scipy/_lib/setup.py b/scipy/_lib/setup.py index 59cd3667b61f..5944f2b7169f 100644 --- a/scipy/_lib/setup.py +++ b/scipy/_lib/setup.py @@ -79,6 +79,8 @@ def get_messagestream_config(ext, build_dir): config.add_subpackage('_uarray') + config.add_subpackage('array_api_compat') + # ensure Boost was checked out and builds config.add_library( 'test_boost_build', diff --git a/scipy/_lib/tests/meson.build b/scipy/_lib/tests/meson.build index 13da47d3d412..197bb3f28ed4 100644 --- a/scipy/_lib/tests/meson.build +++ b/scipy/_lib/tests/meson.build @@ -12,6 +12,7 @@ python_sources = [ 'test_public_api.py', 'test_tmpdirs.py', 'test_warnings.py', + 'test_array_api.py', ] py3.install_sources( diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py new file mode 100644 index 000000000000..c72634f8c8bd --- /dev/null +++ b/scipy/_lib/tests/test_array_api.py @@ -0,0 +1,88 @@ +import numpy as np +from numpy.testing import assert_equal +import pytest + +from scipy.conftest import array_api_compatible +from scipy._lib._array_api import ( + _GLOBAL_CONFIG, array_namespace, as_xparray, as_xparray_namespace, +) + + +if not _GLOBAL_CONFIG["SCIPY_ARRAY_API"]: + pytest.skip( + "Array API test; set environment variable array_api_dispatch=1 to run it", + allow_module_level=True + ) + + +def to_numpy(array, xp): + """Convert `array` into a NumPy ndarray on the CPU. From sklearn.""" + xp_name = xp.__name__ + + if xp_name in {"array_api_compat.torch", "torch"}: + return array.cpu().numpy() + elif xp_name == "cupy.array_api": + return array._array.get() + elif xp_name in {"array_api_compat.cupy", "cupy"}: # pragma: nocover + return array.get() + + return np.asarray(array) + + +def test_array_namespace(): + x, y = np.array([0, 1, 2]), np.array([0, 1, 2]) + xp = array_namespace(x, y) + assert 'array_api_compat.numpy' in xp.__name__ + + _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False + xp = array_namespace(x, y) + assert 'array_api_compat.numpy' in xp.__name__ + _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True + + +@array_api_compatible +def test_asarray(xp): + x, y = as_xparray([0, 1, 2], xp=xp), as_xparray(np.arange(3), xp=xp) + ref = np.array([0, 1, 2]) + assert_equal(x, ref) + assert_equal(y, ref) + + +def test_as_xparray_namespace(): + x, y = np.array([0, 1, 2]), np.array([0, 1, 2]) + x, y, xp_ = as_xparray_namespace(x, y) + assert 'array_api_compat.numpy' in xp_.__name__ + ref = np.array([0, 1, 2]) + assert_equal(x, ref) + assert_equal(y, ref) + assert type(x) == type(y) + + _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = False + x, y, xp_ = as_xparray_namespace(x, y) + assert 'array_api_compat.numpy' in xp_.__name__ + _GLOBAL_CONFIG["SCIPY_ARRAY_API"] = True + + +@array_api_compatible +def test_to_numpy(xp): + x = xp.asarray([0, 1, 2]) + x = to_numpy(x, xp=xp) + assert isinstance(x, np.ndarray) + + +@pytest.mark.filterwarnings("ignore: the matrix subclass") +def test_raises(): + msg = "'numpy.ma.MaskedArray' are not supported" + with pytest.raises(TypeError, match=msg): + array_namespace(np.ma.array(1), np.array(1)) + + msg = "'numpy.matrix' are not supported" + with pytest.raises(TypeError, match=msg): + array_namespace(np.array(1), np.matrix(1)) + + msg = "Only support Array API" + with pytest.raises(TypeError, match=msg): + array_namespace([0, 1, 2]) + + with pytest.raises(TypeError, match=msg): + array_namespace(1) diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index a4f718d52caa..3bd49d68908f 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -134,6 +134,7 @@ import numpy as np from . import _hierarchy, _optimal_leaf_ordering import scipy.spatial.distance as distance +from scipy._lib._array_api import array_namespace, as_xparray from scipy._lib._disjoint_set import DisjointSet @@ -163,12 +164,7 @@ def _copy_array_if_base_present(a): """ Copy the array if its base points to a parent array. """ - if a.base is not None: - return a.copy() - elif np.issubsctype(a, np.float32): - return np.array(a, dtype=np.double) - else: - return a + return a def _copy_arrays_if_base_present(T): @@ -181,28 +177,6 @@ def _copy_arrays_if_base_present(T): return [_copy_array_if_base_present(a) for a in T] -def _randdm(pnts): - """ - Generate a random distance matrix stored in condensed form. - - Parameters - ---------- - pnts : int - The number of points in the distance matrix. Has to be at least 2. - - Returns - ------- - D : ndarray - A ``pnts * (pnts - 1) / 2`` sized vector is returned. - """ - if pnts >= 2: - D = np.random.rand(pnts * (pnts - 1) / 2) - else: - raise ValueError("The number of points in the distance matrix " - "must be at least 2.") - return D - - def single(y): """ Perform single/min/nearest linkage on the condensed distance matrix ``y``. @@ -1042,7 +1016,8 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): if method not in _LINKAGE_METHODS: raise ValueError(f"Invalid method: {method}") - y = _convert_to_double(np.asarray(y, order='c')) + xp = array_namespace(y) + y = _convert_to_double(as_xparray(y, order='c', xp=xp), xp=xp) if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') @@ -1052,29 +1027,33 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): raise ValueError("Method '{}' requires the distance metric " "to be Euclidean".format(method)) if y.shape[0] == y.shape[1] and np.allclose(np.diag(y), 0): - if np.all(y >= 0) and np.allclose(y, y.T): + if xp.all(y >= 0) and np.allclose(y, y.T): _warning('The symmetric non-negative hollow observation ' 'matrix looks suspiciously like an uncondensed ' 'distance matrix') y = distance.pdist(y, metric) + y = xp.asarray(y) else: raise ValueError("`y` must be 1 or 2 dimensional.") - if not np.all(np.isfinite(y)): + if not xp.all(xp.isfinite(y)): raise ValueError("The condensed distance matrix must contain only " "finite values.") n = int(distance.num_obs_y(y)) method_code = _LINKAGE_METHODS[method] + y = np.asarray(y) if method == 'single': result = _hierarchy.mst_single_linkage(y, n) elif method in ['complete', 'average', 'weighted', 'ward']: result = _hierarchy.nn_chain(y, n, method_code) else: result = _hierarchy.fast_linkage(y, n, method_code) + result = xp.asarray(result) if optimal_ordering: + y = xp.asarray(y) return optimal_leaf_ordering(result, y) else: return result @@ -1359,6 +1338,7 @@ def cut_tree(Z, n_clusters=None, height=None): [4, 7]]) # random """ + xp = array_namespace(Z) nobs = num_obs_linkage(Z) nodes = _order_cluster_tree(Z) @@ -1366,29 +1346,32 @@ def cut_tree(Z, n_clusters=None, height=None): raise ValueError("At least one of either height or n_clusters " "must be None") elif height is None and n_clusters is None: # return the full cut tree - cols_idx = np.arange(nobs) + cols_idx = xp.arange(nobs) elif height is not None: - heights = np.array([x.dist for x in nodes]) - cols_idx = np.searchsorted(heights, height) + height = xp.asarray(height) + heights = xp.asarray([x.dist for x in nodes]) + cols_idx = xp.searchsorted(heights, height) else: - cols_idx = nobs - np.searchsorted(np.arange(nobs), n_clusters) + n_clusters = xp.asarray(n_clusters) + cols_idx = nobs - xp.searchsorted(xp.arange(nobs), n_clusters) try: n_cols = len(cols_idx) except TypeError: # scalar n_cols = 1 - cols_idx = np.array([cols_idx]) + cols_idx = xp.asarray([cols_idx]) - groups = np.zeros((n_cols, nobs), dtype=int) - last_group = np.arange(nobs) + groups = xp.zeros((n_cols, nobs), dtype=xp.int64) + last_group = xp.arange(nobs) if 0 in cols_idx: groups[0] = last_group for i, node in enumerate(nodes): idx = node.pre_order() - this_group = last_group.copy() - this_group[idx] = last_group[idx].min() - this_group[this_group > last_group[idx].max()] -= 1 + this_group = as_xparray(last_group, copy=True, xp=xp) + # TODO ARRAY_API complex indexing not supported + this_group[idx] = xp.min(last_group[idx]) + this_group[this_group > xp.max(last_group[idx])] -= 1 if i + 1 in cols_idx: groups[np.nonzero(i + 1 == cols_idx)[0]] = this_group last_group = this_group @@ -1456,7 +1439,8 @@ def to_tree(Z, rd=False): 9 """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') # Number of original objects is equal to the number of rows plus 1. @@ -1471,9 +1455,11 @@ def to_tree(Z, rd=False): nd = None - for i, row in enumerate(Z): - fi = int(row[0]) - fj = int(row[1]) + for i in range(Z.shape[0]): + row = Z[i, :] + + fi = xp.astype(row[0], xp.int64) + fj = xp.astype(row[1], xp.int64) if fi > i + n: raise ValueError(('Corrupt matrix Z. Index to derivative cluster ' 'is used before it is formed. See row %d, ' @@ -1535,10 +1521,11 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): array([3, 0, 2, 5, 7, 4, 8, 6, 9, 1], dtype=int32) """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z, y) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') - y = _convert_to_double(np.asarray(y, order='c')) + y = _convert_to_double(as_xparray(y, order='c', xp=xp), xp=xp) if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') @@ -1550,29 +1537,30 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): 'matrix looks suspiciously like an uncondensed ' 'distance matrix') y = distance.pdist(y, metric) + y = xp.asarray(y) else: raise ValueError("`y` must be 1 or 2 dimensional.") - if not np.all(np.isfinite(y)): + if not xp.all(xp.isfinite(y)): raise ValueError("The condensed distance matrix must contain only " "finite values.") - return _optimal_leaf_ordering.optimal_leaf_ordering(Z, y) + Z = np.asarray(Z) + y = np.asarray(y) + return xp.asarray(_optimal_leaf_ordering.optimal_leaf_ordering(Z, y)) -def _convert_to_bool(X): - if X.dtype != bool: - X = X.astype(bool) - if not X.flags.contiguous: - X = X.copy() +def _convert_to_bool(X, xp): + if X.dtype != xp.bool: + X = xp.astype(X, bool) + X = as_xparray(X, copy=True, xp=xp) return X -def _convert_to_double(X): - if X.dtype != np.double: - X = X.astype(np.double) - if not X.flags.contiguous: - X = X.copy() +def _convert_to_double(X, xp): + if X.dtype != xp.float64: + X = xp.astype(X, xp.float64) + X = as_xparray(X, copy=True, xp=xp) return X @@ -1683,7 +1671,8 @@ def cophenet(Z, Y=None): corners - thus, the distance between these clusters will be larger. """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z, Y) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') Zs = Z.shape n = Zs[0] + 1 @@ -1691,23 +1680,25 @@ def cophenet(Z, Y=None): zz = np.zeros((n * (n-1)) // 2, dtype=np.double) # Since the C code does not support striding using strides. # The dimensions are used instead. - Z = _convert_to_double(Z) + Z = _convert_to_double(Z, xp=xp) + Z = np.asarray(Z) _hierarchy.cophenetic_distances(Z, zz, int(n)) + zz = xp.asarray(zz) if Y is None: return zz - Y = np.asarray(Y, order='c') + Y = as_xparray(Y, order='c', xp=xp) distance.is_valid_y(Y, throw=True, name='Y') - z = zz.mean() - y = Y.mean() + z = xp.mean(zz) + y = xp.mean(Y) Yy = Y - y Zz = zz - z numerator = (Yy * Zz) denomA = Yy**2 denomB = Zz**2 - c = numerator.sum() / np.sqrt(denomA.sum() * denomB.sum()) + c = xp.sum(numerator) / xp.sqrt(xp.sum(denomA) * xp.sum(denomB)) return (c, zz) @@ -1766,7 +1757,8 @@ def inconsistent(Z, d=2): [ 6.44583366, 6.76770586, 3. , 1.12682288]]) """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', xp=xp) Zs = Z.shape is_valid_linkage(Z, throw=True, name='Z') @@ -1781,7 +1773,9 @@ def inconsistent(Z, d=2): n = Zs[0] + 1 R = np.zeros((n - 1, 4), dtype=np.double) + Z = np.asarray(Z) _hierarchy.inconsistent(Z, R, int(n), int(d)) + R = xp.asarray(R) return R @@ -1855,28 +1849,31 @@ def from_mlab_linkage(Z): (MATLAB format uses 1-indexing, whereas SciPy uses 0-indexing). """ - Z = np.asarray(Z, dtype=np.double, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, dtype=xp.float64, order='c', xp=xp) Zs = Z.shape # If it's empty, return it. if len(Zs) == 0 or (len(Zs) == 1 and Zs[0] == 0): - return Z.copy() + return as_xparray(Z, copy=True, xp=xp) if len(Zs) != 2: raise ValueError("The linkage array must be rectangular.") # If it contains no rows, return it. if Zs[0] == 0: - return Z.copy() + return as_xparray(Z, copy=True, xp=xp) - Zpart = Z.copy() - if Zpart[:, 0:2].min() != 1.0 and Zpart[:, 0:2].max() != 2 * Zs[0]: + Zpart = as_xparray(Z, copy=True, xp=xp) + if xp.min(Zpart[:, 0:2]) != 1.0 and xp.max(Zpart[:, 0:2]) != 2 * Zs[0]: raise ValueError('The format of the indices is not 1..N') Zpart[:, 0:2] -= 1.0 CS = np.zeros((Zs[0],), dtype=np.double) + Zpart = np.asarray(Zpart) _hierarchy.calculate_cluster_sizes(Zpart, CS, int(Zs[0]) + 1) - return np.hstack([Zpart, CS.reshape(Zs[0], 1)]) + res = np.hstack([Zpart, CS.reshape(Zs[0], 1)]) + return xp.asarray(res) def to_mlab_linkage(Z): @@ -1954,13 +1951,14 @@ def to_mlab_linkage(Z): the original linkage matrix has been dropped. """ - Z = np.asarray(Z, order='c', dtype=np.double) + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', dtype=xp.float64) Zs = Z.shape if len(Zs) == 0 or (len(Zs) == 1 and Zs[0] == 0): - return Z.copy() + return as_xparray(Z, copy=True, xp=xp) is_valid_linkage(Z, throw=True, name='Z') - ZP = Z[:, 0:3].copy() + ZP = as_xparray(Z[:, 0:3], copy=True, xp=xp) ZP[:, 0:2] += 1.0 return ZP @@ -2044,11 +2042,12 @@ def is_monotonic(Z): increasing order. """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') # We expect the i'th value to be greater than its successor. - return (Z[1:, 2] >= Z[:-1, 2]).all() + return xp.all(Z[1:, 2] >= Z[:-1, 2]) def is_valid_im(R, warning=False, throw=False, name=None): @@ -2138,14 +2137,12 @@ def is_valid_im(R, warning=False, throw=False, name=None): False """ - R = np.asarray(R, order='c') + xp = array_namespace(R) + R = as_xparray(R, order='c', xp=xp) valid = True name_str = "%r " % name if name else '' try: - if type(R) != np.ndarray: - raise TypeError('Variable %spassed as inconsistency matrix is not ' - 'a numpy array.' % name_str) - if R.dtype != np.double: + if R.dtype != xp.float64: raise TypeError('Inconsistency matrix %smust contain doubles ' '(double).' % name_str) if len(R.shape) != 2: @@ -2157,13 +2154,13 @@ def is_valid_im(R, warning=False, throw=False, name=None): if R.shape[0] < 1: raise ValueError('Inconsistency matrix %smust have at least one ' 'row.' % name_str) - if (R[:, 0] < 0).any(): + if xp.any(R[:, 0] < 0): raise ValueError('Inconsistency matrix %scontains negative link ' 'height means.' % name_str) - if (R[:, 1] < 0).any(): + if xp.any(R[:, 1] < 0): raise ValueError('Inconsistency matrix %scontains negative link ' 'height standard deviations.' % name_str) - if (R[:, 2] < 0).any(): + if xp.any(R[:, 2] < 0): raise ValueError('Inconsistency matrix %scontains negative link ' 'counts.' % name_str) except Exception as e: @@ -2257,14 +2254,12 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): False """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', xp=xp) valid = True name_str = "%r " % name if name else '' try: - if type(Z) != np.ndarray: - raise TypeError('Passed linkage argument %sis not a valid array.' % - name_str) - if Z.dtype != np.double: + if Z.dtype != xp.float64: raise TypeError('Linkage matrix %smust contain doubles.' % name_str) if len(Z.shape) != 2: raise ValueError('Linkage matrix %smust have shape=2 (i.e. be ' @@ -2276,13 +2271,13 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): 'observations.') n = Z.shape[0] if n > 1: - if ((Z[:, 0] < 0).any() or (Z[:, 1] < 0).any()): + if (xp.any(Z[:, 0] < 0) or xp.any(Z[:, 1] < 0)): raise ValueError('Linkage %scontains negative indices.' % name_str) - if (Z[:, 2] < 0).any(): + if xp.any(Z[:, 2] < 0): raise ValueError('Linkage %scontains negative distances.' % name_str) - if (Z[:, 3] < 0).any(): + if xp.any(Z[:, 3] < 0): raise ValueError('Linkage %scontains negative counts.' % name_str) if _check_hierarchy_uses_cluster_before_formed(Z): @@ -2313,10 +2308,10 @@ def _check_hierarchy_uses_cluster_more_than_once(Z): n = Z.shape[0] + 1 chosen = set() for i in range(0, n - 1): - if (Z[i, 0] in chosen) or (Z[i, 1] in chosen) or Z[i, 0] == Z[i, 1]: + if (float(Z[i, 0]) in chosen) or (float(Z[i, 1]) in chosen) or Z[i, 0] == Z[i, 1]: return True - chosen.add(Z[i, 0]) - chosen.add(Z[i, 1]) + chosen.add(float(Z[i, 0])) + chosen.add(float(Z[i, 1])) return False @@ -2363,7 +2358,7 @@ def num_obs_linkage(Z): 12 """ - Z = np.asarray(Z, order='c') + Z = as_xparray(Z, order='c') is_valid_linkage(Z, throw=True, name='Z') return (Z.shape[0] + 1) @@ -2419,8 +2414,9 @@ def correspond(Z, Y): """ is_valid_linkage(Z, throw=True) distance.is_valid_y(Y, throw=True) - Z = np.asarray(Z, order='c') - Y = np.asarray(Y, order='c') + xp = array_namespace(Z, Y) + Z = as_xparray(Z, order='c', xp=xp) + Y = as_xparray(Y, order='c', xp=xp) return distance.num_obs_y(Y) == num_obs_linkage(Z) @@ -2575,9 +2571,12 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): all data points to be merged together - so a single cluster is returned. """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') + t = xp.asarray(t) + n = Z.shape[0] + 1 T = np.zeros((n,), dtype='i') @@ -2585,30 +2584,35 @@ def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): # The dimensions are used instead. [Z] = _copy_arrays_if_base_present([Z]) + Z = np.asarray(Z) + monocrit = np.asarray(monocrit) if criterion == 'inconsistent': if R is None: R = inconsistent(Z, depth) else: - R = np.asarray(R, order='c') + R = as_xparray(R, order='c', xp=xp) is_valid_im(R, throw=True, name='R') # Since the C code does not support striding using strides. # The dimensions are used instead. [R] = _copy_arrays_if_base_present([R]) + R = np.asarray(R) _hierarchy.cluster_in(Z, R, T, float(t), int(n)) elif criterion == 'distance': _hierarchy.cluster_dist(Z, T, float(t), int(n)) elif criterion == 'maxclust': - _hierarchy.cluster_maxclust_dist(Z, T, int(n), int(t)) + t_ = xp.astype(t, xp.int64) + _hierarchy.cluster_maxclust_dist(Z, T, int(n), t_) elif criterion == 'monocrit': [monocrit] = _copy_arrays_if_base_present([monocrit]) _hierarchy.cluster_monocrit(Z, monocrit, T, float(t), int(n)) elif criterion == 'maxclust_monocrit': [monocrit] = _copy_arrays_if_base_present([monocrit]) - _hierarchy.cluster_maxclust_monocrit(Z, monocrit, T, int(n), int(t)) + t_ = xp.astype(t, xp.int64) + _hierarchy.cluster_maxclust_monocrit(Z, monocrit, T, int(n), t_) else: raise ValueError('Invalid cluster formation criterion: %s' % str(criterion)) - return T + return xp.asarray(T) def fclusterdata(X, t, criterion='inconsistent', @@ -2694,18 +2698,20 @@ def fclusterdata(X, t, criterion='inconsistent', default settings) is four clusters with three data points each. """ - X = np.asarray(X, order='c', dtype=np.double) + xp = array_namespace(X) + X = as_xparray(X, order='c', dtype=xp.float64) - if type(X) != np.ndarray or len(X.shape) != 2: - raise TypeError('The observation matrix X must be an n by m numpy ' + if len(X.shape) != 2: + raise TypeError('The observation matrix X must be an n by m ' 'array.') Y = distance.pdist(X, metric=metric) + Y = xp.asarray(Y) Z = linkage(Y, method=method) if R is None: R = inconsistent(Z, d=depth) else: - R = np.asarray(R, order='c') + R = as_xparray(R, order='c', xp=xp) T = fcluster(Z, criterion=criterion, depth=depth, R=R, t=t) return T @@ -2758,13 +2764,15 @@ def leaves_list(Z): >>> plt.show() """ - Z = np.asarray(Z, order='c') + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 ML = np.zeros((n,), dtype='i') [Z] = _copy_arrays_if_base_present([Z]) + Z = np.asarray(Z) _hierarchy.prelist(Z, ML, int(n)) - return ML + return xp.asarray(ML) # Maps number of leaves to text size. @@ -3294,14 +3302,19 @@ def llf(id): # or results in a crossing, an exception will be thrown. Passing # None orders leaf nodes based on the order they appear in the # pre-order traversal. - Z = np.asarray(Z, order='c') + Z = as_xparray(Z, order='c') if orientation not in ["top", "left", "bottom", "right"]: raise ValueError("orientation must be one of 'top', 'left', " "'bottom', or 'right'") - if labels is not None and Z.shape[0] + 1 != len(labels): - raise ValueError("Dimensions of Z and labels must be consistent.") + if labels is not None: + try: + len_labels = len(labels) + except (TypeError, AttributeError): + len_labels = labels.shape[0] + if Z.shape[0] + 1 != len_labels: + raise ValueError("Dimensions of Z and labels must be consistent.") is_valid_linkage(Z, throw=True, name='Z') Zs = Z.shape @@ -3445,21 +3458,21 @@ def _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, ivl.append(leaf_label_func(int(i))) else: if show_leaf_counts: - ivl.append("(" + str(int(Z[i - n, 3])) + ")") + ivl.append("(" + str(np.asarray(Z[i - n, 3], dtype=np.int64)) + ")") else: ivl.append("") -def _append_contraction_marks(Z, iv, i, n, contraction_marks): - _append_contraction_marks_sub(Z, iv, int(Z[i - n, 0]), n, contraction_marks) - _append_contraction_marks_sub(Z, iv, int(Z[i - n, 1]), n, contraction_marks) +def _append_contraction_marks(Z, iv, i, n, contraction_marks, xp): + _append_contraction_marks_sub(Z, iv, xp.astype(Z[i - n, 0], xp.int64), n, contraction_marks, xp) + _append_contraction_marks_sub(Z, iv, xp.astype(Z[i - n, 1], xp.int64), n, contraction_marks, xp) -def _append_contraction_marks_sub(Z, iv, i, n, contraction_marks): +def _append_contraction_marks_sub(Z, iv, i, n, contraction_marks, xp): if i >= n: contraction_marks.append((iv, Z[i - n, 2])) - _append_contraction_marks_sub(Z, iv, int(Z[i - n, 0]), n, contraction_marks) - _append_contraction_marks_sub(Z, iv, int(Z[i - n, 1]), n, contraction_marks) + _append_contraction_marks_sub(Z, iv, xp.astype(Z[i - n, 0], xp.int64), n, contraction_marks, xp) + _append_contraction_marks_sub(Z, iv, xp.astype(Z[i - n, 1], xp.int64), n, contraction_marks, xp) def _dendrogram_calculate_info(Z, p, truncate_mode, @@ -3511,6 +3524,7 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, the target node. """ + xp = array_namespace(Z) if n == 0: raise ValueError("Invalid singleton cluster count n.") @@ -3527,7 +3541,7 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, leaf_label_func, i, labels, show_leaf_counts) if contraction_marks is not None: - _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks) + _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks, xp) return (iv + 5.0, 10.0, 0.0, d) elif i < n: _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, @@ -3540,7 +3554,7 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, leaf_label_func, i, labels, show_leaf_counts) if contraction_marks is not None: - _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks) + _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks, xp) return (iv + 5.0, 10.0, 0.0, d) elif i < n: _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, @@ -3558,8 +3572,8 @@ def _dendrogram_calculate_info(Z, p, truncate_mode, # !!! Otherwise, we don't have a leaf node, so work on plotting a # non-leaf node. # Actual indices of a and b - aa = int(Z[i - n, 0]) - ab = int(Z[i - n, 1]) + aa = xp.astype(Z[i - n, 0], xp.int64) + ab = xp.astype(Z[i - n, 1], xp.int64) if aa >= n: # The number of singletons below cluster a na = Z[aa - n, 3] @@ -3755,11 +3769,6 @@ def is_isomorphic(T1, T2): T1 = np.asarray(T1, order='c') T2 = np.asarray(T2, order='c') - if type(T1) != np.ndarray: - raise TypeError('T1 must be a numpy array.') - if type(T2) != np.ndarray: - raise TypeError('T2 must be a numpy array.') - T1S = T1.shape T2S = T2.shape @@ -3859,13 +3868,16 @@ def maxdists(Z): this case. """ - Z = np.asarray(Z, order='c', dtype=np.double) + xp = array_namespace(Z) + Z = as_xparray(Z, order='c', dtype=xp.float64, xp=xp) is_valid_linkage(Z, throw=True, name='Z') n = Z.shape[0] + 1 MD = np.zeros((n - 1,)) [Z] = _copy_arrays_if_base_present([Z]) + Z = np.asarray(Z) _hierarchy.get_max_dist_for_each_cluster(Z, MD, int(n)) + MD = xp.asarray(MD) return MD @@ -3944,8 +3956,9 @@ def maxinconsts(Z, R): 1.15470054]) """ - Z = np.asarray(Z, order='c') - R = np.asarray(R, order='c') + xp = array_namespace(Z, R) + Z = as_xparray(Z, order='c', xp=xp) + R = as_xparray(R, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') is_valid_im(R, throw=True, name='R') @@ -3955,7 +3968,10 @@ def maxinconsts(Z, R): "have a different number of rows.") MI = np.zeros((n - 1,)) [Z, R] = _copy_arrays_if_base_present([Z, R]) + Z = np.asarray(Z) + R = np.asarray(R) _hierarchy.get_max_Rfield_for_each_cluster(Z, R, MI, int(n), 3) + MI = xp.asarray(MI) return MI @@ -4036,8 +4052,9 @@ def maxRstat(Z, R, i): 1.15470054]) """ - Z = np.asarray(Z, order='c') - R = np.asarray(R, order='c') + xp = array_namespace(Z, R) + Z = as_xparray(Z, order='c', xp=xp) + R = as_xparray(R, order='c', xp=xp) is_valid_linkage(Z, throw=True, name='Z') is_valid_im(R, throw=True, name='R') if type(i) is not int: @@ -4052,7 +4069,10 @@ def maxRstat(Z, R, i): n = Z.shape[0] + 1 MR = np.zeros((n - 1,)) [Z, R] = _copy_arrays_if_base_present([Z, R]) + Z = np.asarray(Z) + R = np.asarray(R) _hierarchy.get_max_Rfield_for_each_cluster(Z, R, MR, int(n), i) + MR = xp.asarray(MR) return MR @@ -4158,22 +4178,26 @@ def leaders(Z, T): array([1, 2, 3, 4], dtype=int32) """ - Z = np.asarray(Z, order='c') - T = np.asarray(T, order='c') - if type(T) != np.ndarray or T.dtype != 'i': - raise TypeError('T must be a one-dimensional numpy array of integers.') + xp = array_namespace(Z, T) + Z = as_xparray(Z, order='c', xp=xp) + T = as_xparray(T, order='c', xp=xp) + if T.dtype != xp.int32: + raise TypeError('T must be a one-dimensional array of integers.') is_valid_linkage(Z, throw=True, name='Z') - if len(T) != Z.shape[0] + 1: + if T.shape[0] != Z.shape[0] + 1: raise ValueError('Mismatch: len(T)!=Z.shape[0] + 1.') Cl = np.unique(T) kk = len(Cl) - L = np.zeros((kk,), dtype='i') - M = np.zeros((kk,), dtype='i') + L = np.zeros((kk,), dtype=np.int32) + M = np.zeros((kk,), dtype=np.int32) n = Z.shape[0] + 1 [Z, T] = _copy_arrays_if_base_present([Z, T]) + Z = np.asarray(Z) + T = np.asarray(T, dtype=np.int32) s = _hierarchy.leaders(Z, T, L, M, int(kk), int(n)) if s >= 0: raise ValueError(('T is not a valid assignment vector. Error found ' 'when examining linkage node %d (< 2n-1).') % s) + L, M = xp.asarray(L), xp.asarray(M) return (L, M) diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index 6398b6781dc2..7dd47a8fec7a 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -47,6 +47,12 @@ _order_cluster_tree, _hierarchy, _LINKAGE_METHODS) from scipy.spatial.distance import pdist from scipy.cluster._hierarchy import Heap +from scipy.conftest import ( + array_api_compatible, + skip_if_array_api, + skip_if_array_api_gpu, + skip_if_array_api_backend, +) from . import hierarchy_test_data @@ -65,11 +71,12 @@ class TestLinkage: - def test_linkage_non_finite_elements_in_distance_matrix(self): + @array_api_compatible + def test_linkage_non_finite_elements_in_distance_matrix(self, xp): # Tests linkage(Y) where Y contains a non-finite element (e.g. NaN or Inf). # Exception expected. - y = np.zeros((6,)) - y[0] = np.nan + y = xp.zeros((6,)) + y[0] = xp.nan assert_raises(ValueError, linkage, y) def test_linkage_empty_distance_matrix(self): @@ -77,32 +84,35 @@ def test_linkage_empty_distance_matrix(self): y = np.zeros((0,)) assert_raises(ValueError, linkage, y) - def test_linkage_tdist(self): + @array_api_compatible + def test_linkage_tdist(self, xp): for method in ['single', 'complete', 'average', 'weighted']: - self.check_linkage_tdist(method) + self.check_linkage_tdist(method, xp) - def check_linkage_tdist(self, method): + def check_linkage_tdist(self, method, xp): # Tests linkage(Y, method) on the tdist data set. - Z = linkage(hierarchy_test_data.ytdist, method) + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), method) expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_' + method) assert_allclose(Z, expectedZ, atol=1e-10) - def test_linkage_X(self): + @array_api_compatible + def test_linkage_X(self, xp): for method in ['centroid', 'median', 'ward']: - self.check_linkage_q(method) + self.check_linkage_q(method, xp) - def check_linkage_q(self, method): + def check_linkage_q(self, method, xp): # Tests linkage(Y, method) on the Q data set. - Z = linkage(hierarchy_test_data.X, method) + Z = linkage(xp.asarray(hierarchy_test_data.X), method) expectedZ = getattr(hierarchy_test_data, 'linkage_X_' + method) assert_allclose(Z, expectedZ, atol=1e-06) y = scipy.spatial.distance.pdist(hierarchy_test_data.X, metric="euclidean") - Z = linkage(y, method) + Z = linkage(xp.asarray(y), method) assert_allclose(Z, expectedZ, atol=1e-06) - def test_compare_with_trivial(self): + @array_api_compatible + def test_compare_with_trivial(self, xp): rng = np.random.RandomState(0) n = 20 X = rng.rand(n, 2) @@ -110,11 +120,12 @@ def test_compare_with_trivial(self): for method, code in _LINKAGE_METHODS.items(): Z_trivial = _hierarchy.linkage(d, n, code) - Z = linkage(d, method) + Z = linkage(xp.asarray(d), method) assert_allclose(Z_trivial, Z, rtol=1e-14, atol=1e-15) - def test_optimal_leaf_ordering(self): - Z = linkage(hierarchy_test_data.ytdist, optimal_ordering=True) + @array_api_compatible + def test_optimal_leaf_ordering(self, xp): + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), optimal_ordering=True) expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_single_olo') assert_allclose(Z, expectedZ, atol=1e-10) @@ -137,139 +148,155 @@ class TestLinkageTies: [2, 3, 2.44948974, 3]]), } - def test_linkage_ties(self): + @array_api_compatible + def test_linkage_ties(self, xp): for method in ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']: - self.check_linkage_ties(method) + self.check_linkage_ties(method, xp) - def check_linkage_ties(self, method): - X = np.array([[-1, -1], [0, 0], [1, 1]]) + def check_linkage_ties(self, method, xp): + X = xp.asarray([[-1, -1], [0, 0], [1, 1]]) Z = linkage(X, method=method) expectedZ = self._expectations[method] assert_allclose(Z, expectedZ, atol=1e-06) class TestInconsistent: - def test_inconsistent_tdist(self): + @array_api_compatible + def test_inconsistent_tdist(self, xp): for depth in hierarchy_test_data.inconsistent_ytdist: - self.check_inconsistent_tdist(depth) + self.check_inconsistent_tdist(depth, xp) - def check_inconsistent_tdist(self, depth): - Z = hierarchy_test_data.linkage_ytdist_single + def check_inconsistent_tdist(self, depth, xp): + Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single) assert_allclose(inconsistent(Z, depth), hierarchy_test_data.inconsistent_ytdist[depth]) class TestCopheneticDistance: - def test_linkage_cophenet_tdist_Z(self): + @array_api_compatible + def test_linkage_cophenet_tdist_Z(self, xp): # Tests cophenet(Z) on tdist data set. - expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, - 295, 138, 219, 295, 295]) - Z = hierarchy_test_data.linkage_ytdist_single + expectedM = xp.asarray([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, + 295, 138, 219, 295, 295]) + Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single) M = cophenet(Z) assert_allclose(M, expectedM, atol=1e-10) - def test_linkage_cophenet_tdist_Z_Y(self): + @array_api_compatible + def test_linkage_cophenet_tdist_Z_Y(self, xp): # Tests cophenet(Z, Y) on tdist data set. - Z = hierarchy_test_data.linkage_ytdist_single - (c, M) = cophenet(Z, hierarchy_test_data.ytdist) - expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, - 295, 138, 219, 295, 295]) + Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single) + (c, M) = cophenet(Z, xp.asarray(hierarchy_test_data.ytdist)) + expectedM = xp.asarray([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, + 295, 138, 219, 295, 295]) expectedc = 0.639931296433393415057366837573 assert_allclose(c, expectedc, atol=1e-10) assert_allclose(M, expectedM, atol=1e-10) class TestMLabLinkageConversion: + @skip_if_array_api def test_mlab_linkage_conversion_empty(self): # Tests from/to_mlab_linkage on empty linkage array. X = np.asarray([]) assert_equal(from_mlab_linkage([]), X) assert_equal(to_mlab_linkage([]), X) - def test_mlab_linkage_conversion_single_row(self): + @array_api_compatible + def test_mlab_linkage_conversion_single_row(self, xp): # Tests from/to_mlab_linkage on linkage array with single row. - Z = np.asarray([[0., 1., 3., 2.]]) - Zm = [[1, 2, 3]] - assert_equal(from_mlab_linkage(Zm), Z) - assert_equal(to_mlab_linkage(Z), Zm) + Z = xp.asarray([[0., 1., 3., 2.]]) + Zm = xp.asarray([[1, 2, 3]]) + assert_allclose(from_mlab_linkage(Zm), Z, rtol=1e-15) + assert_allclose(to_mlab_linkage(Z), Zm, rtol=1e-15) - def test_mlab_linkage_conversion_multiple_rows(self): + @array_api_compatible + def test_mlab_linkage_conversion_multiple_rows(self, xp): # Tests from/to_mlab_linkage on linkage array with multiple rows. - Zm = np.asarray([[3, 6, 138], [4, 5, 219], + Zm = xp.asarray([[3, 6, 138], [4, 5, 219], [1, 8, 255], [2, 9, 268], [7, 10, 295]]) - Z = np.array([[2., 5., 138., 2.], - [3., 4., 219., 2.], - [0., 7., 255., 3.], - [1., 8., 268., 4.], - [6., 9., 295., 6.]], - dtype=np.double) - assert_equal(from_mlab_linkage(Zm), Z) - assert_equal(to_mlab_linkage(Z), Zm) + Z = xp.asarray([[2., 5., 138., 2.], + [3., 4., 219., 2.], + [0., 7., 255., 3.], + [1., 8., 268., 4.], + [6., 9., 295., 6.]], + dtype=xp.float64) + assert_allclose(from_mlab_linkage(Zm), Z, rtol=1e-15) + assert_allclose(to_mlab_linkage(Z), Zm, rtol=1e-15) class TestFcluster: - def test_fclusterdata(self): + @array_api_compatible + def test_fclusterdata(self, xp): for t in hierarchy_test_data.fcluster_inconsistent: - self.check_fclusterdata(t, 'inconsistent') + self.check_fclusterdata(t, 'inconsistent', xp) for t in hierarchy_test_data.fcluster_distance: - self.check_fclusterdata(t, 'distance') + self.check_fclusterdata(t, 'distance', xp) for t in hierarchy_test_data.fcluster_maxclust: - self.check_fclusterdata(t, 'maxclust') + self.check_fclusterdata(t, 'maxclust', xp) - def check_fclusterdata(self, t, criterion): + def check_fclusterdata(self, t, criterion, xp): # Tests fclusterdata(X, criterion=criterion, t=t) on a random 3-cluster data set. - expectedT = getattr(hierarchy_test_data, 'fcluster_' + criterion)[t] - X = hierarchy_test_data.Q_X + expectedT = xp.asarray(getattr(hierarchy_test_data, 'fcluster_' + criterion)[t]) + X = xp.asarray(hierarchy_test_data.Q_X) + t = xp.asarray(t) T = fclusterdata(X, criterion=criterion, t=t) assert_(is_isomorphic(T, expectedT)) - def test_fcluster(self): + @array_api_compatible + def test_fcluster(self, xp): for t in hierarchy_test_data.fcluster_inconsistent: - self.check_fcluster(t, 'inconsistent') + self.check_fcluster(t, 'inconsistent', xp) for t in hierarchy_test_data.fcluster_distance: - self.check_fcluster(t, 'distance') + self.check_fcluster(t, 'distance', xp) for t in hierarchy_test_data.fcluster_maxclust: - self.check_fcluster(t, 'maxclust') + self.check_fcluster(t, 'maxclust', xp) - def check_fcluster(self, t, criterion): + def check_fcluster(self, t, criterion, xp): # Tests fcluster(Z, criterion=criterion, t=t) on a random 3-cluster data set. - expectedT = getattr(hierarchy_test_data, 'fcluster_' + criterion)[t] - Z = single(hierarchy_test_data.Q_X) + expectedT = xp.asarray(getattr(hierarchy_test_data, 'fcluster_' + criterion)[t]) + Z = single(xp.asarray(hierarchy_test_data.Q_X)) + t = xp.asarray(t) T = fcluster(Z, criterion=criterion, t=t) assert_(is_isomorphic(T, expectedT)) - def test_fcluster_monocrit(self): + @array_api_compatible + def test_fcluster_monocrit(self, xp): for t in hierarchy_test_data.fcluster_distance: - self.check_fcluster_monocrit(t) + self.check_fcluster_monocrit(t, xp) for t in hierarchy_test_data.fcluster_maxclust: - self.check_fcluster_maxclust_monocrit(t) + self.check_fcluster_maxclust_monocrit(t, xp) - def check_fcluster_monocrit(self, t): - expectedT = hierarchy_test_data.fcluster_distance[t] - Z = single(hierarchy_test_data.Q_X) + def check_fcluster_monocrit(self, t, xp): + expectedT = xp.asarray(hierarchy_test_data.fcluster_distance[t]) + Z = single(xp.asarray(hierarchy_test_data.Q_X)) T = fcluster(Z, t, criterion='monocrit', monocrit=maxdists(Z)) assert_(is_isomorphic(T, expectedT)) - def check_fcluster_maxclust_monocrit(self, t): - expectedT = hierarchy_test_data.fcluster_maxclust[t] - Z = single(hierarchy_test_data.Q_X) + def check_fcluster_maxclust_monocrit(self, t, xp): + expectedT = xp.asarray(hierarchy_test_data.fcluster_maxclust[t]) + Z = single(xp.asarray(hierarchy_test_data.Q_X)) T = fcluster(Z, t, criterion='maxclust_monocrit', monocrit=maxdists(Z)) assert_(is_isomorphic(T, expectedT)) class TestLeaders: - def test_leaders_single(self): + @array_api_compatible + def test_leaders_single(self, xp): # Tests leaders using a flat clustering generated by single linkage. X = hierarchy_test_data.Q_X Y = pdist(X) + Y = xp.asarray(Y) Z = linkage(Y) T = fcluster(Z, criterion='maxclust', t=3) - Lright = (np.array([53, 55, 56]), np.array([2, 3, 1])) + Lright = (xp.asarray([53, 55, 56]), xp.asarray([2, 3, 1])) + T = xp.asarray(T, dtype=xp.int32) L = leaders(Z, T) - assert_equal(L, Lright) + assert_allclose(np.concatenate(L), np.concatenate(Lright), rtol=1e-15) class TestIsIsomorphic: + @skip_if_array_api def test_is_isomorphic_1(self): # Tests is_isomorphic on test case #1 (one flat cluster, different labellings) a = [1, 1, 1] @@ -277,46 +304,53 @@ def test_is_isomorphic_1(self): assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) + @skip_if_array_api def test_is_isomorphic_2(self): # Tests is_isomorphic on test case #2 (two flat clusters, different labelings) - a = [1, 7, 1] - b = [2, 3, 2] + a = np.asarray([1, 7, 1]) + b = np.asarray([2, 3, 2]) assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) + @skip_if_array_api def test_is_isomorphic_3(self): # Tests is_isomorphic on test case #3 (no flat clusters) - a = [] - b = [] + a = np.asarray([]) + b = np.asarray([]) assert_(is_isomorphic(a, b)) + @skip_if_array_api def test_is_isomorphic_4A(self): # Tests is_isomorphic on test case #4A (3 flat clusters, different labelings, isomorphic) - a = [1, 2, 3] - b = [1, 3, 2] + a = np.asarray([1, 2, 3]) + b = np.asarray([1, 3, 2]) assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) + @skip_if_array_api def test_is_isomorphic_4B(self): # Tests is_isomorphic on test case #4B (3 flat clusters, different labelings, nonisomorphic) - a = [1, 2, 3, 3] - b = [1, 3, 2, 3] + a = np.asarray([1, 2, 3, 3]) + b = np.asarray([1, 3, 2, 3]) assert_(is_isomorphic(a, b) is False) assert_(is_isomorphic(b, a) is False) + @skip_if_array_api def test_is_isomorphic_4C(self): # Tests is_isomorphic on test case #4C (3 flat clusters, different labelings, isomorphic) - a = [7, 2, 3] - b = [6, 3, 2] + a = np.asarray([7, 2, 3]) + b = np.asarray([6, 3, 2]) assert_(is_isomorphic(a, b)) assert_(is_isomorphic(b, a)) + @skip_if_array_api def test_is_isomorphic_5(self): # Tests is_isomorphic on test case #5 (1000 observations, 2/3/5 random # clusters, random permutation of the labeling). for nc in [2, 3, 5]: self.help_is_isomorphic_randperm(1000, nc) + @skip_if_array_api def test_is_isomorphic_6(self): # Tests is_isomorphic on test case #5A (1000 observations, 2/3/5 random # clusters, random permutation of the labeling, slightly @@ -324,9 +358,12 @@ def test_is_isomorphic_6(self): for nc in [2, 3, 5]: self.help_is_isomorphic_randperm(1000, nc, True, 5) + @skip_if_array_api def test_is_isomorphic_7(self): # Regression test for gh-6271 - assert_(not is_isomorphic([1, 2, 3], [1, 1, 1])) + a = np.asarray([1, 2, 3]) + b = np.asarray([1, 1, 1]) + assert_(not is_isomorphic(a, b)) def help_is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0): for k in range(3): @@ -344,76 +381,90 @@ def help_is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0): class TestIsValidLinkage: - def test_is_valid_linkage_various_size(self): + @array_api_compatible + def test_is_valid_linkage_various_size(self, xp): for nrow, ncol, valid in [(2, 5, False), (2, 3, False), (1, 4, True), (2, 4, True)]: - self.check_is_valid_linkage_various_size(nrow, ncol, valid) + self.check_is_valid_linkage_various_size(nrow, ncol, valid, xp) - def check_is_valid_linkage_various_size(self, nrow, ncol, valid): + def check_is_valid_linkage_various_size(self, nrow, ncol, valid, xp): # Tests is_valid_linkage(Z) with linkage matrics of various sizes - Z = np.asarray([[0, 1, 3.0, 2, 5], - [3, 2, 4.0, 3, 3]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2, 5], + [3, 2, 4.0, 3, 3]], dtype=xp.float64) Z = Z[:nrow, :ncol] assert_(is_valid_linkage(Z) == valid) if not valid: assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_int_type(self): + @array_api_compatible + def test_is_valid_linkage_int_type(self, xp): # Tests is_valid_linkage(Z) with integer type. - Z = np.asarray([[0, 1, 3.0, 2], - [3, 2, 4.0, 3]], dtype=int) + Z = xp.asarray([[0, 1, 3.0, 2], + [3, 2, 4.0, 3]], dtype=xp.int64) assert_(is_valid_linkage(Z) is False) assert_raises(TypeError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_empty(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_is_valid_linkage_empty(self, xp): # Tests is_valid_linkage(Z) with empty linkage. - Z = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) assert_(is_valid_linkage(Z) is False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_4_and_up(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) assert_(is_valid_linkage(Z) is True) - def test_is_valid_linkage_4_and_up_neg_index_left(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up_neg_index_left(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative indices (left). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) Z[i//2,0] = -2 assert_(is_valid_linkage(Z) is False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_4_and_up_neg_index_right(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up_neg_index_right(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative indices (right). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) Z[i//2,1] = -2 assert_(is_valid_linkage(Z) is False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_4_and_up_neg_dist(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up_neg_dist(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative distances. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) Z[i//2,2] = -0.5 assert_(is_valid_linkage(Z) is False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) - def test_is_valid_linkage_4_and_up_neg_counts(self): + @array_api_compatible + def test_is_valid_linkage_4_and_up_neg_counts(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative counts. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) Z[i//2,3] = -2 assert_(is_valid_linkage(Z) is False) @@ -421,69 +472,81 @@ def test_is_valid_linkage_4_and_up_neg_counts(self): class TestIsValidInconsistent: - def test_is_valid_im_int_type(self): + @array_api_compatible + def test_is_valid_im_int_type(self, xp): # Tests is_valid_im(R) with integer type. - R = np.asarray([[0, 1, 3.0, 2], - [3, 2, 4.0, 3]], dtype=int) + R = xp.asarray([[0, 1, 3.0, 2], + [3, 2, 4.0, 3]], dtype=xp.int64) assert_(is_valid_im(R) is False) assert_raises(TypeError, is_valid_im, R, throw=True) - def test_is_valid_im_various_size(self): + @array_api_compatible + def test_is_valid_im_various_size(self, xp): for nrow, ncol, valid in [(2, 5, False), (2, 3, False), (1, 4, True), (2, 4, True)]: - self.check_is_valid_im_various_size(nrow, ncol, valid) + self.check_is_valid_im_various_size(nrow, ncol, valid, xp) - def check_is_valid_im_various_size(self, nrow, ncol, valid): + def check_is_valid_im_various_size(self, nrow, ncol, valid, xp): # Tests is_valid_im(R) with linkage matrics of various sizes - R = np.asarray([[0, 1, 3.0, 2, 5], - [3, 2, 4.0, 3, 3]], dtype=np.double) + R = xp.asarray([[0, 1, 3.0, 2, 5], + [3, 2, 4.0, 3, 3]], dtype=xp.float64) R = R[:nrow, :ncol] assert_(is_valid_im(R) == valid) if not valid: assert_raises(ValueError, is_valid_im, R, throw=True) - def test_is_valid_im_empty(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_is_valid_im_empty(self, xp): # Tests is_valid_im(R) with empty inconsistency matrix. - R = np.zeros((0, 4), dtype=np.double) + R = xp.zeros((0, 4), dtype=xp.float64) assert_(is_valid_im(R) is False) assert_raises(ValueError, is_valid_im, R, throw=True) - def test_is_valid_im_4_and_up(self): + @array_api_compatible + def test_is_valid_im_4_and_up(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) assert_(is_valid_im(R) is True) - def test_is_valid_im_4_and_up_neg_index_left(self): + @array_api_compatible + def test_is_valid_im_4_and_up_neg_index_left(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link height means. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) R[i//2,0] = -2.0 assert_(is_valid_im(R) is False) assert_raises(ValueError, is_valid_im, R, throw=True) - def test_is_valid_im_4_and_up_neg_index_right(self): + @array_api_compatible + def test_is_valid_im_4_and_up_neg_index_right(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link height standard deviations. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) R[i//2,1] = -2.0 assert_(is_valid_im(R) is False) assert_raises(ValueError, is_valid_im, R, throw=True) - def test_is_valid_im_4_and_up_neg_dist(self): + @array_api_compatible + def test_is_valid_im_4_and_up_neg_dist(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link counts. for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) R[i//2,2] = -0.5 @@ -492,307 +555,365 @@ def test_is_valid_im_4_and_up_neg_dist(self): class TestNumObsLinkage: - def test_num_obs_linkage_empty(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_num_obs_linkage_empty(self, xp): # Tests num_obs_linkage(Z) with empty linkage. - Z = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, num_obs_linkage, Z) - def test_num_obs_linkage_1x4(self): + @array_api_compatible + def test_num_obs_linkage_1x4(self, xp): # Tests num_obs_linkage(Z) on linkage over 2 observations. - Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2]], dtype=xp.float64) assert_equal(num_obs_linkage(Z), 2) - def test_num_obs_linkage_2x4(self): + @array_api_compatible + def test_num_obs_linkage_2x4(self, xp): # Tests num_obs_linkage(Z) on linkage over 3 observations. - Z = np.asarray([[0, 1, 3.0, 2], - [3, 2, 4.0, 3]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2], + [3, 2, 4.0, 3]], dtype=xp.float64) assert_equal(num_obs_linkage(Z), 3) - def test_num_obs_linkage_4_and_up(self): + @array_api_compatible + def test_num_obs_linkage_4_and_up(self, xp): # Tests num_obs_linkage(Z) on linkage on observation sets between sizes # 4 and 15 (step size 3). for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) assert_equal(num_obs_linkage(Z), i) class TestLeavesList: - def test_leaves_list_1x4(self): + @array_api_compatible + def test_leaves_list_1x4(self, xp): # Tests leaves_list(Z) on a 1x4 linkage. - Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2]], dtype=xp.float64) to_tree(Z) - assert_equal(leaves_list(Z), [0, 1]) + assert_allclose(leaves_list(Z), [0, 1], rtol=1e-15) - def test_leaves_list_2x4(self): + @array_api_compatible + def test_leaves_list_2x4(self, xp): # Tests leaves_list(Z) on a 2x4 linkage. - Z = np.asarray([[0, 1, 3.0, 2], - [3, 2, 4.0, 3]], dtype=np.double) + Z = xp.asarray([[0, 1, 3.0, 2], + [3, 2, 4.0, 3]], dtype=xp.float64) to_tree(Z) - assert_equal(leaves_list(Z), [0, 1, 2]) + assert_allclose(leaves_list(Z), [0, 1, 2], rtol=1e-15) - def test_leaves_list_Q(self): + @array_api_compatible + def test_leaves_list_Q(self, xp): for method in ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']: - self.check_leaves_list_Q(method) + self.check_leaves_list_Q(method, xp) - def check_leaves_list_Q(self, method): + def check_leaves_list_Q(self, method, xp): # Tests leaves_list(Z) on the Q data set - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) node = to_tree(Z) - assert_equal(node.pre_order(), leaves_list(Z)) + assert_allclose(node.pre_order(), leaves_list(Z), rtol=1e-15) - def test_Q_subtree_pre_order(self): + @array_api_compatible + def test_Q_subtree_pre_order(self, xp): # Tests that pre_order() works when called on sub-trees. - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, 'single') node = to_tree(Z) - assert_equal(node.pre_order(), (node.get_left().pre_order() - + node.get_right().pre_order())) + assert_allclose(node.pre_order(), (node.get_left().pre_order() + + node.get_right().pre_order()), + rtol=1e-15) class TestCorrespond: - def test_correspond_empty(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_correspond_empty(self, xp): # Tests correspond(Z, y) with empty linkage and condensed distance matrix. - y = np.zeros((0,)) - Z = np.zeros((0,4)) + y = xp.zeros((0,), dtype=xp.float64) + Z = xp.zeros((0,4), dtype=xp.float64) assert_raises(ValueError, correspond, Z, y) - def test_correspond_2_and_up(self): + @array_api_compatible + def test_correspond_2_and_up(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. for i in range(2, 4): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) assert_(correspond(Z, y)) for i in range(4, 15, 3): y = np.random.rand(i*(i-1)//2) + y = xp.asarray(y) Z = linkage(y) assert_(correspond(Z, y)) - def test_correspond_4_and_up(self): + @array_api_compatible + def test_correspond_4_and_up(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. Correspondence should be false. for (i, j) in (list(zip(list(range(2, 4)), list(range(3, 5)))) + list(zip(list(range(3, 5)), list(range(2, 4))))): y = np.random.rand(i*(i-1)//2) y2 = np.random.rand(j*(j-1)//2) + y = xp.asarray(y) + y2 = xp.asarray(y2) Z = linkage(y) Z2 = linkage(y2) assert_equal(correspond(Z, y2), False) assert_equal(correspond(Z2, y), False) - def test_correspond_4_and_up_2(self): + @array_api_compatible + def test_correspond_4_and_up_2(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. Correspondence should be false. for (i, j) in (list(zip(list(range(2, 7)), list(range(16, 21)))) + list(zip(list(range(2, 7)), list(range(16, 21))))): y = np.random.rand(i*(i-1)//2) y2 = np.random.rand(j*(j-1)//2) + y = xp.asarray(y) + y2 = xp.asarray(y2) Z = linkage(y) Z2 = linkage(y2) assert_equal(correspond(Z, y2), False) assert_equal(correspond(Z2, y), False) - def test_num_obs_linkage_multi_matrix(self): + @array_api_compatible + def test_num_obs_linkage_multi_matrix(self, xp): # Tests num_obs_linkage with observation matrices of multiple sizes. for n in range(2, 10): X = np.random.rand(n, 4) Y = pdist(X) + Y = xp.asarray(Y) Z = linkage(Y) assert_equal(num_obs_linkage(Z), n) class TestIsMonotonic: - def test_is_monotonic_empty(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_is_monotonic_empty(self, xp): # Tests is_monotonic(Z) on an empty linkage. - Z = np.zeros((0, 4)) + Z = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, is_monotonic, Z) - def test_is_monotonic_1x4(self): + @array_api_compatible + def test_is_monotonic_1x4(self, xp): # Tests is_monotonic(Z) on 1x4 linkage. Expecting True. - Z = np.asarray([[0, 1, 0.3, 2]], dtype=np.double) - assert_equal(is_monotonic(Z), True) + Z = xp.asarray([[0, 1, 0.3, 2]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), True) - def test_is_monotonic_2x4_T(self): + @array_api_compatible + def test_is_monotonic_2x4_T(self, xp): # Tests is_monotonic(Z) on 2x4 linkage. Expecting True. - Z = np.asarray([[0, 1, 0.3, 2], - [2, 3, 0.4, 3]], dtype=np.double) - assert_equal(is_monotonic(Z), True) + Z = xp.asarray([[0, 1, 0.3, 2], + [2, 3, 0.4, 3]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), True) - def test_is_monotonic_2x4_F(self): + @array_api_compatible + def test_is_monotonic_2x4_F(self, xp): # Tests is_monotonic(Z) on 2x4 linkage. Expecting False. - Z = np.asarray([[0, 1, 0.4, 2], - [2, 3, 0.3, 3]], dtype=np.double) - assert_equal(is_monotonic(Z), False) + Z = xp.asarray([[0, 1, 0.4, 2], + [2, 3, 0.3, 3]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_3x4_T(self): + @array_api_compatible + def test_is_monotonic_3x4_T(self, xp): # Tests is_monotonic(Z) on 3x4 linkage. Expecting True. - Z = np.asarray([[0, 1, 0.3, 2], + Z = xp.asarray([[0, 1, 0.3, 2], [2, 3, 0.4, 2], - [4, 5, 0.6, 4]], dtype=np.double) - assert_equal(is_monotonic(Z), True) + [4, 5, 0.6, 4]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), True) - def test_is_monotonic_3x4_F1(self): + @array_api_compatible + def test_is_monotonic_3x4_F1(self, xp): # Tests is_monotonic(Z) on 3x4 linkage (case 1). Expecting False. - Z = np.asarray([[0, 1, 0.3, 2], + Z = xp.asarray([[0, 1, 0.3, 2], [2, 3, 0.2, 2], - [4, 5, 0.6, 4]], dtype=np.double) - assert_equal(is_monotonic(Z), False) + [4, 5, 0.6, 4]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_3x4_F2(self): + @array_api_compatible + def test_is_monotonic_3x4_F2(self, xp): # Tests is_monotonic(Z) on 3x4 linkage (case 2). Expecting False. - Z = np.asarray([[0, 1, 0.8, 2], + Z = xp.asarray([[0, 1, 0.8, 2], [2, 3, 0.4, 2], - [4, 5, 0.6, 4]], dtype=np.double) - assert_equal(is_monotonic(Z), False) + [4, 5, 0.6, 4]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_3x4_F3(self): + @array_api_compatible + def test_is_monotonic_3x4_F3(self, xp): # Tests is_monotonic(Z) on 3x4 linkage (case 3). Expecting False - Z = np.asarray([[0, 1, 0.3, 2], + Z = xp.asarray([[0, 1, 0.3, 2], [2, 3, 0.4, 2], - [4, 5, 0.2, 4]], dtype=np.double) - assert_equal(is_monotonic(Z), False) + [4, 5, 0.2, 4]], dtype=xp.float64) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_tdist_linkage1(self): + @array_api_compatible + def test_is_monotonic_tdist_linkage1(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # tdist data set. Expecting True. - Z = linkage(hierarchy_test_data.ytdist, 'single') - assert_equal(is_monotonic(Z), True) + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') + assert_allclose(is_monotonic(Z), True) - def test_is_monotonic_tdist_linkage2(self): + @array_api_compatible + def test_is_monotonic_tdist_linkage2(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # tdist data set. Perturbing. Expecting False. - Z = linkage(hierarchy_test_data.ytdist, 'single') + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') Z[2,2] = 0.0 - assert_equal(is_monotonic(Z), False) + assert_allclose(is_monotonic(Z), False) - def test_is_monotonic_Q_linkage(self): + @array_api_compatible + def test_is_monotonic_Q_linkage(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # Q data set. Expecting True. - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, 'single') - assert_equal(is_monotonic(Z), True) + assert_allclose(is_monotonic(Z), True) class TestMaxDists: - def test_maxdists_empty_linkage(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_maxdists_empty_linkage(self, xp): # Tests maxdists(Z) on empty linkage. Expecting exception. - Z = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, maxdists, Z) - def test_maxdists_one_cluster_linkage(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_maxdists_one_cluster_linkage(self, xp): # Tests maxdists(Z) on linkage with one cluster. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) MD = maxdists(Z) - expectedMD = calculate_maximum_distances(Z) + expectedMD = calculate_maximum_distances(Z, xp) assert_allclose(MD, expectedMD, atol=1e-15) - def test_maxdists_Q_linkage(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_maxdists_Q_linkage(self, xp): for method in ['single', 'complete', 'ward', 'centroid', 'median']: - self.check_maxdists_Q_linkage(method) + self.check_maxdists_Q_linkage(method, xp) - def check_maxdists_Q_linkage(self, method): + def check_maxdists_Q_linkage(self, method, xp): # Tests maxdists(Z) on the Q data set - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) MD = maxdists(Z) - expectedMD = calculate_maximum_distances(Z) + expectedMD = calculate_maximum_distances(Z, xp) assert_allclose(MD, expectedMD, atol=1e-15) class TestMaxInconsts: - def test_maxinconsts_empty_linkage(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_maxinconsts_empty_linkage(self, xp): # Tests maxinconsts(Z, R) on empty linkage. Expecting exception. - Z = np.zeros((0, 4), dtype=np.double) - R = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) + R = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, maxinconsts, Z, R) - def test_maxinconsts_difrow_linkage(self): + @array_api_compatible + def test_maxinconsts_difrow_linkage(self, xp): # Tests maxinconsts(Z, R) on linkage and inconsistency matrices with # different numbers of clusters. Expecting exception. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) R = np.random.rand(2, 4) + R = xp.asarray(R) assert_raises(ValueError, maxinconsts, Z, R) - def test_maxinconsts_one_cluster_linkage(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_maxinconsts_one_cluster_linkage(self, xp): # Tests maxinconsts(Z, R) on linkage with one cluster. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) - R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) + R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64) MD = maxinconsts(Z, R) - expectedMD = calculate_maximum_inconsistencies(Z, R) + expectedMD = calculate_maximum_inconsistencies(Z, R, xp=xp) assert_allclose(MD, expectedMD, atol=1e-15) - def test_maxinconsts_Q_linkage(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_maxinconsts_Q_linkage(self, xp): for method in ['single', 'complete', 'ward', 'centroid', 'median']: - self.check_maxinconsts_Q_linkage(method) + self.check_maxinconsts_Q_linkage(method, xp) - def check_maxinconsts_Q_linkage(self, method): + def check_maxinconsts_Q_linkage(self, method, xp): # Tests maxinconsts(Z, R) on the Q data set - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) R = inconsistent(Z) MD = maxinconsts(Z, R) - expectedMD = calculate_maximum_inconsistencies(Z, R) + expectedMD = calculate_maximum_inconsistencies(Z, R, xp=xp) assert_allclose(MD, expectedMD, atol=1e-15) class TestMaxRStat: - def test_maxRstat_invalid_index(self): + @array_api_compatible + def test_maxRstat_invalid_index(self, xp): for i in [3.3, -1, 4]: - self.check_maxRstat_invalid_index(i) + self.check_maxRstat_invalid_index(i, xp) - def check_maxRstat_invalid_index(self, i): + def check_maxRstat_invalid_index(self, i, xp): # Tests maxRstat(Z, R, i). Expecting exception. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) - R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) + R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64) if isinstance(i, int): assert_raises(ValueError, maxRstat, Z, R, i) else: assert_raises(TypeError, maxRstat, Z, R, i) - def test_maxRstat_empty_linkage(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_maxRstat_empty_linkage(self, xp): for i in range(4): - self.check_maxRstat_empty_linkage(i) + self.check_maxRstat_empty_linkage(i, xp) - def check_maxRstat_empty_linkage(self, i): + def check_maxRstat_empty_linkage(self, i, xp): # Tests maxRstat(Z, R, i) on empty linkage. Expecting exception. - Z = np.zeros((0, 4), dtype=np.double) - R = np.zeros((0, 4), dtype=np.double) + Z = xp.zeros((0, 4), dtype=xp.float64) + R = xp.zeros((0, 4), dtype=xp.float64) assert_raises(ValueError, maxRstat, Z, R, i) - def test_maxRstat_difrow_linkage(self): + @array_api_compatible + def test_maxRstat_difrow_linkage(self, xp): for i in range(4): - self.check_maxRstat_difrow_linkage(i) + self.check_maxRstat_difrow_linkage(i, xp) - def check_maxRstat_difrow_linkage(self, i): + def check_maxRstat_difrow_linkage(self, i, xp): # Tests maxRstat(Z, R, i) on linkage and inconsistency matrices with # different numbers of clusters. Expecting exception. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) R = np.random.rand(2, 4) + R = xp.asarray(R) assert_raises(ValueError, maxRstat, Z, R, i) - def test_maxRstat_one_cluster_linkage(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_maxRstat_one_cluster_linkage(self, xp): for i in range(4): - self.check_maxRstat_one_cluster_linkage(i) + self.check_maxRstat_one_cluster_linkage(i, xp) - def check_maxRstat_one_cluster_linkage(self, i): + def check_maxRstat_one_cluster_linkage(self, i, xp): # Tests maxRstat(Z, R, i) on linkage with one cluster. - Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double) - R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double) + Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64) + R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64) MD = maxRstat(Z, R, 1) - expectedMD = calculate_maximum_inconsistencies(Z, R, 1) + expectedMD = calculate_maximum_inconsistencies(Z, R, 1, xp) assert_allclose(MD, expectedMD, atol=1e-15) - def test_maxRstat_Q_linkage(self): + @array_api_compatible + def test_maxRstat_Q_linkage(self, xp): for method in ['single', 'complete', 'ward', 'centroid', 'median']: for i in range(4): - self.check_maxRstat_Q_linkage(method, i) + self.check_maxRstat_Q_linkage(method, i, xp) - def check_maxRstat_Q_linkage(self, method, i): + def check_maxRstat_Q_linkage(self, method, i, xp): # Tests maxRstat(Z, R, i) on the Q data set - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Z = linkage(X, method) R = inconsistent(Z) MD = maxRstat(Z, R, 1) @@ -801,28 +922,32 @@ def check_maxRstat_Q_linkage(self, method, i): class TestDendrogram: - def test_dendrogram_single_linkage_tdist(self): + @array_api_compatible + def test_dendrogram_single_linkage_tdist(self, xp): # Tests dendrogram calculation on single linkage of the tdist data set. - Z = linkage(hierarchy_test_data.ytdist, 'single') + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') R = dendrogram(Z, no_plot=True) leaves = R["leaves"] assert_equal(leaves, [2, 5, 1, 0, 3, 4]) - def test_valid_orientation(self): - Z = linkage(hierarchy_test_data.ytdist, 'single') + @array_api_compatible + def test_valid_orientation(self, xp): + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') assert_raises(ValueError, dendrogram, Z, orientation="foo") - def test_labels_as_array_or_list(self): + @array_api_compatible + def test_labels_as_array_or_list(self, xp): # test for gh-12418 - Z = linkage(hierarchy_test_data.ytdist, 'single') - labels = np.array([1, 3, 2, 6, 4, 5]) + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') + labels = xp.asarray([1, 3, 2, 6, 4, 5]) result1 = dendrogram(Z, labels=labels, no_plot=True) - result2 = dendrogram(Z, labels=labels.tolist(), no_plot=True) + result2 = dendrogram(Z, labels=list(labels), no_plot=True) assert result1 == result2 + @array_api_compatible @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") - def test_valid_label_size(self): - link = np.array([ + def test_valid_label_size(self, xp): + link = xp.asarray([ [0, 1, 1.0, 4], [2, 3, 1.0, 5], [4, 5, 2.0, 6], @@ -840,14 +965,15 @@ def test_valid_label_size(self): plt.close() + @array_api_compatible @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") - def test_dendrogram_plot(self): + def test_dendrogram_plot(self, xp): for orientation in ['top', 'bottom', 'left', 'right']: - self.check_dendrogram_plot(orientation) + self.check_dendrogram_plot(orientation, xp) - def check_dendrogram_plot(self, orientation): + def check_dendrogram_plot(self, orientation, xp): # Tests dendrogram plotting. - Z = linkage(hierarchy_test_data.ytdist, 'single') + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') expected = {'color_list': ['C1', 'C0', 'C0', 'C0', 'C0'], 'dcoord': [[0.0, 138.0, 138.0, 0.0], [0.0, 219.0, 219.0, 0.0], @@ -869,6 +995,7 @@ def check_dendrogram_plot(self, orientation): # test that dendrogram accepts ax keyword R1 = dendrogram(Z, ax=ax, orientation=orientation) + R1['dcoord'] = np.asarray(R1['dcoord']) assert_equal(R1, expected) # test that dendrogram accepts and handle the leaf_font_size and @@ -880,8 +1007,8 @@ def check_dendrogram_plot(self, orientation): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_equal(testlabel.get_rotation(), 90) - assert_equal(testlabel.get_size(), 20) + assert_allclose(testlabel.get_rotation(), 90, rtol=1e-15) + assert_allclose(testlabel.get_size(), 20, rtol=1e-15) dendrogram(Z, ax=ax, orientation=orientation, leaf_rotation=90) testlabel = ( @@ -889,7 +1016,7 @@ def check_dendrogram_plot(self, orientation): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_equal(testlabel.get_rotation(), 90) + assert_allclose(testlabel.get_rotation(), 90, rtol=1e-15) dendrogram(Z, ax=ax, orientation=orientation, leaf_font_size=20) testlabel = ( @@ -897,20 +1024,23 @@ def check_dendrogram_plot(self, orientation): if orientation in ['top', 'bottom'] else ax.get_yticklabels()[0] ) - assert_equal(testlabel.get_size(), 20) + assert_allclose(testlabel.get_size(), 20, rtol=1e-15) plt.close() # test plotting to gca (will import pylab) R2 = dendrogram(Z, orientation=orientation) plt.close() + R2['dcoord'] = np.asarray(R2['dcoord']) assert_equal(R2, expected) + @array_api_compatible @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") - def test_dendrogram_truncate_mode(self): - Z = linkage(hierarchy_test_data.ytdist, 'single') + def test_dendrogram_truncate_mode(self, xp): + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') R = dendrogram(Z, 2, 'lastp', show_contracted=True) plt.close() + R['dcoord'] = np.asarray(R['dcoord']) assert_equal(R, {'color_list': ['C0'], 'dcoord': [[0.0, 295.0, 295.0, 0.0]], 'icoord': [[5.0, 5.0, 15.0, 15.0]], @@ -921,6 +1051,7 @@ def test_dendrogram_truncate_mode(self): R = dendrogram(Z, 2, 'mtica', show_contracted=True) plt.close() + R['dcoord'] = np.asarray(R['dcoord']) assert_equal(R, {'color_list': ['C1', 'C0', 'C0', 'C0'], 'dcoord': [[0.0, 138.0, 138.0, 0.0], [0.0, 255.0, 255.0, 0.0], @@ -935,9 +1066,10 @@ def test_dendrogram_truncate_mode(self): 'leaves_color_list': ['C1', 'C1', 'C0', 'C0', 'C0'], }) - def test_dendrogram_colors(self): + @array_api_compatible + def test_dendrogram_colors(self, xp): # Tests dendrogram plots with alternate colors - Z = linkage(hierarchy_test_data.ytdist, 'single') + Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') set_link_color_palette(['c', 'm', 'y', 'k']) R = dendrogram(Z, no_plot=True, @@ -950,30 +1082,32 @@ def test_dendrogram_colors(self): # reset color palette (global list) set_link_color_palette(None) - def test_dendrogram_leaf_colors_zero_dist(self): + @array_api_compatible + def test_dendrogram_leaf_colors_zero_dist(self, xp): # tests that the colors of leafs are correct for tree # with two identical points - x = np.array([[1, 0, 0], - [0, 0, 1], - [0, 2, 0], - [0, 0, 1], - [0, 1, 0], - [0, 1, 0]]) + x = xp.asarray([[1, 0, 0], + [0, 0, 1], + [0, 2, 0], + [0, 0, 1], + [0, 1, 0], + [0, 1, 0]]) z = linkage(x, "single") d = dendrogram(z, no_plot=True) exp_colors = ['C0', 'C1', 'C1', 'C0', 'C2', 'C2'] colors = d["leaves_color_list"] assert_equal(colors, exp_colors) - def test_dendrogram_leaf_colors(self): + @array_api_compatible + def test_dendrogram_leaf_colors(self, xp): # tests that the colors are correct for a tree # with two near points ((0, 0, 1.1) and (0, 0, 1)) - x = np.array([[1, 0, 0], - [0, 0, 1.1], - [0, 2, 0], - [0, 0, 1], - [0, 1, 0], - [0, 1, 0]]) + x = xp.asarray([[1, 0, 0], + [0, 0, 1.1], + [0, 2, 0], + [0, 0, 1], + [0, 1, 0], + [0, 1, 0]]) z = linkage(x, "single") d = dendrogram(z, no_plot=True) exp_colors = ['C0', 'C1', 'C1', 'C0', 'C2', 'C2'] @@ -981,62 +1115,67 @@ def test_dendrogram_leaf_colors(self): assert_equal(colors, exp_colors) -def calculate_maximum_distances(Z): +def calculate_maximum_distances(Z, xp): # Used for testing correctness of maxdists. n = Z.shape[0] + 1 - B = np.zeros((n-1,)) - q = np.zeros((3,)) + B = xp.zeros((n-1,)) + q = xp.zeros((3,)) for i in range(0, n - 1): q[:] = 0.0 left = Z[i, 0] right = Z[i, 1] if left >= n: - q[0] = B[int(left) - n] + q[0] = B[xp.asarray(left, dtype=xp.int64) - n] if right >= n: - q[1] = B[int(right) - n] + q[1] = B[xp.asarray(right, dtype=xp.int64) - n] q[2] = Z[i, 2] - B[i] = q.max() + B[i] = xp.max(q) return B -def calculate_maximum_inconsistencies(Z, R, k=3): +def calculate_maximum_inconsistencies(Z, R, k=3, xp=np): # Used for testing correctness of maxinconsts. n = Z.shape[0] + 1 - B = np.zeros((n-1,)) - q = np.zeros((3,)) + B = xp.zeros((n-1,)) + q = xp.zeros((3,)) for i in range(0, n - 1): q[:] = 0.0 left = Z[i, 0] right = Z[i, 1] if left >= n: - q[0] = B[int(left) - n] + q[0] = B[xp.asarray(left, dtype=xp.int64) - n] if right >= n: - q[1] = B[int(right) - n] + q[1] = B[xp.asarray(right, dtype=xp.int64) - n] q[2] = R[i, k] - B[i] = q.max() + B[i] = xp.max(q) return B -def test_unsupported_uncondensed_distance_matrix_linkage_warning(): - assert_warns(ClusterWarning, linkage, [[0, 1], [1, 0]]) +@array_api_compatible +def test_unsupported_uncondensed_distance_matrix_linkage_warning(xp): + assert_warns(ClusterWarning, linkage, xp.asarray([[0, 1], [1, 0]])) -def test_euclidean_linkage_value_error(): +@array_api_compatible +def test_euclidean_linkage_value_error(xp): for method in scipy.cluster.hierarchy._EUCLIDEAN_METHODS: - assert_raises(ValueError, linkage, [[1, 1], [1, 1]], + assert_raises(ValueError, linkage, xp.asarray([[1, 1], [1, 1]]), method=method, metric='cityblock') -def test_2x2_linkage(): - Z1 = linkage([1], method='single', metric='euclidean') - Z2 = linkage([[0, 1], [0, 0]], method='single', metric='euclidean') - assert_allclose(Z1, Z2) +@array_api_compatible +def test_2x2_linkage(xp): + Z1 = linkage(xp.asarray([1]), method='single', metric='euclidean') + Z2 = linkage(xp.asarray([[0, 1], [0, 0]]), method='single', metric='euclidean') + assert_allclose(Z1, Z2, rtol=1e-15) -def test_node_compare(): +@array_api_compatible +def test_node_compare(xp): np.random.seed(23) nobs = 50 X = np.random.randn(nobs, 4) + X = xp.asarray(X) Z = scipy.cluster.hierarchy.ward(X) tree = to_tree(Z) assert_(tree > tree.get_left()) @@ -1045,46 +1184,52 @@ def test_node_compare(): assert_(tree.get_right() != tree.get_left()) -def test_cut_tree(): +@skip_if_array_api_gpu +@array_api_compatible +@skip_if_array_api_backend('numpy.array_api') +def test_cut_tree(xp): np.random.seed(23) nobs = 50 X = np.random.randn(nobs, 4) + X = xp.asarray(X) Z = scipy.cluster.hierarchy.ward(X) cutree = cut_tree(Z) - assert_equal(cutree[:, 0], np.arange(nobs)) - assert_equal(cutree[:, -1], np.zeros(nobs)) - assert_equal(cutree.max(0), np.arange(nobs - 1, -1, -1)) + assert_allclose(cutree[:, 0], xp.arange(nobs), rtol=1e-15) + assert_allclose(cutree[:, -1], xp.zeros(nobs), rtol=1e-15) + assert_equal(np.asarray(cutree).max(0), np.arange(nobs - 1, -1, -1)) - assert_equal(cutree[:, [-5]], cut_tree(Z, n_clusters=5)) - assert_equal(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10])) - assert_equal(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5])) + assert_allclose(cutree[:, [-5]], cut_tree(Z, n_clusters=5), rtol=1e-15) + assert_allclose(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10]), rtol=1e-15) + assert_allclose(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5]), rtol=1e-15) nodes = _order_cluster_tree(Z) - heights = np.array([node.dist for node in nodes]) + heights = xp.asarray([node.dist for node in nodes]) - assert_equal(cutree[:, np.searchsorted(heights, [5])], - cut_tree(Z, height=5)) - assert_equal(cutree[:, np.searchsorted(heights, [5, 10])], - cut_tree(Z, height=[5, 10])) - assert_equal(cutree[:, np.searchsorted(heights, [10, 5])], - cut_tree(Z, height=[10, 5])) + assert_allclose(cutree[:, np.searchsorted(heights, [5])], + cut_tree(Z, height=5), rtol=1e-15) + assert_allclose(cutree[:, np.searchsorted(heights, [5, 10])], + cut_tree(Z, height=[5, 10]), rtol=1e-15) + assert_allclose(cutree[:, np.searchsorted(heights, [10, 5])], + cut_tree(Z, height=[10, 5]), rtol=1e-15) -def test_optimal_leaf_ordering(): +@array_api_compatible +def test_optimal_leaf_ordering(xp): # test with the distance vector y - Z = optimal_leaf_ordering(linkage(hierarchy_test_data.ytdist), - hierarchy_test_data.ytdist) + Z = optimal_leaf_ordering(linkage(xp.asarray(hierarchy_test_data.ytdist)), + xp.asarray(hierarchy_test_data.ytdist)) expectedZ = hierarchy_test_data.linkage_ytdist_single_olo assert_allclose(Z, expectedZ, atol=1e-10) # test with the observation matrix X - Z = optimal_leaf_ordering(linkage(hierarchy_test_data.X, 'ward'), - hierarchy_test_data.X) + Z = optimal_leaf_ordering(linkage(xp.asarray(hierarchy_test_data.X), 'ward'), + xp.asarray(hierarchy_test_data.X)) expectedZ = hierarchy_test_data.linkage_X_ward_olo assert_allclose(Z, expectedZ, atol=1e-06) +@skip_if_array_api def test_Heap(): values = np.array([2, -1, 0, -1.5, 3]) heap = Heap(values) diff --git a/scipy/cluster/tests/test_vq.py b/scipy/cluster/tests/test_vq.py index 6696ce51556d..3c852733540d 100644 --- a/scipy/cluster/tests/test_vq.py +++ b/scipy/cluster/tests/test_vq.py @@ -11,7 +11,14 @@ from scipy.cluster.vq import (kmeans, kmeans2, py_vq, vq, whiten, ClusterError, _krandinit) from scipy.cluster import _vq +from scipy.conftest import ( + array_api_compatible, + skip_if_array_api, + skip_if_array_api_gpu, + skip_if_array_api_backend, +) from scipy.sparse._sputils import matrix +from scipy._lib._array_api import SCIPY_ARRAY_API, as_xparray TESTDATA_2D = np.array([ @@ -71,37 +78,51 @@ class TestWhiten: - def test_whiten(self): - desired = np.array([[5.08738849, 2.97091878], + @array_api_compatible + def test_whiten(self, xp): + desired = xp.asarray([[5.08738849, 2.97091878], [3.19909255, 0.69660580], [4.51041982, 0.02640918], [4.38567074, 0.95120889], [2.32191480, 1.63195503]]) - for tp in np.array, matrix: - obs = tp([[0.98744510, 0.82766775], - [0.62093317, 0.19406729], - [0.87545741, 0.00735733], - [0.85124403, 0.26499712], - [0.45067590, 0.45464607]]) + + obs = xp.asarray([[0.98744510, 0.82766775], + [0.62093317, 0.19406729], + [0.87545741, 0.00735733], + [0.85124403, 0.26499712], + [0.45067590, 0.45464607]]) + if "cupy" in xp.__name__: + import cupy as cp + cp.testing.assert_allclose(whiten(obs), desired, rtol=1e-5) + else: assert_allclose(whiten(obs), desired, rtol=1e-5) - def test_whiten_zero_std(self): + @array_api_compatible + def test_whiten_zero_std(self, xp): desired = np.array([[0., 1.0, 2.86666544], [0., 1.0, 1.32460034], [0., 1.0, 3.74382172]]) - for tp in np.array, matrix: - obs = tp([[0., 1., 0.74109533], - [0., 1., 0.34243798], - [0., 1., 0.96785929]]) - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') + + obs = xp.asarray([[0., 1., 0.74109533], + [0., 1., 0.34243798], + [0., 1., 0.96785929]]) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + + if "cupy" in xp.__name__: + import cupy as cp + cp.testing.assert_allclose(whiten(obs), desired, rtol=1e-5) + else: assert_allclose(whiten(obs), desired, rtol=1e-5) - assert_equal(len(w), 1) - assert_(issubclass(w[-1].category, RuntimeWarning)) - def test_whiten_not_finite(self): - for tp in np.array, matrix: - for bad_value in np.nan, np.inf, -np.inf: + assert_equal(len(w), 1) + assert_(issubclass(w[-1].category, RuntimeWarning)) + + @array_api_compatible + def test_whiten_not_finite(self, xp): + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] + for tp in arrays: + for bad_value in xp.nan, xp.inf, -xp.inf: obs = tp([[0.98744510, bad_value], [0.62093317, 0.19406729], [0.87545741, 0.00735733], @@ -111,43 +132,57 @@ def test_whiten_not_finite(self): class TestVq: - def test_py_vq(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_py_vq(self, xp): initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - for tp in np.array, matrix: + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] + for tp in arrays: label1 = py_vq(tp(X), tp(initc))[0] assert_array_equal(label1, LABEL1) + @skip_if_array_api def test_vq(self): initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - for tp in np.array, matrix: + for tp in [np.asarray, matrix]: label1, dist = _vq.vq(tp(X), tp(initc)) assert_array_equal(label1, LABEL1) tlabel1, tdist = vq(tp(X), tp(initc)) - def test_vq_1d(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_vq_1d(self, xp): # Test special rank 1 vq algo, python implementation. data = X[:, 0] initc = data[:3] a, b = _vq.vq(data, initc) + data = xp.asarray(data) + initc = xp.asarray(initc) ta, tb = py_vq(data[:, np.newaxis], initc[:, np.newaxis]) assert_array_equal(a, ta) assert_array_equal(b, tb) + @skip_if_array_api def test__vq_sametype(self): a = np.array([1.0, 2.0], dtype=np.float64) b = a.astype(np.float32) assert_raises(TypeError, _vq.vq, a, b) + @skip_if_array_api def test__vq_invalid_type(self): a = np.array([1, 2], dtype=int) assert_raises(TypeError, _vq.vq, a, a) - def test_vq_large_nfeat(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_vq_large_nfeat(self, xp): X = np.random.rand(20, 20) code_book = np.random.rand(3, 20) codes0, dis0 = _vq.vq(X, code_book) - codes1, dis1 = py_vq(X, code_book) + codes1, dis1 = py_vq( + xp.asarray(X), xp.asarray(code_book) + ) assert_allclose(dis0, dis1, 1e-5) assert_array_equal(codes0, codes1) @@ -155,22 +190,29 @@ def test_vq_large_nfeat(self): code_book = code_book.astype(np.float32) codes0, dis0 = _vq.vq(X, code_book) - codes1, dis1 = py_vq(X, code_book) + codes1, dis1 = py_vq( + xp.asarray(X), xp.asarray(code_book) + ) assert_allclose(dis0, dis1, 1e-5) assert_array_equal(codes0, codes1) - def test_vq_large_features(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_vq_large_features(self, xp): X = np.random.rand(10, 5) * 1000000 code_book = np.random.rand(2, 5) * 1000000 codes0, dis0 = _vq.vq(X, code_book) - codes1, dis1 = py_vq(X, code_book) + codes1, dis1 = py_vq( + xp.asarray(X), xp.asarray(code_book) + ) assert_allclose(dis0, dis1, 1e-5) assert_array_equal(codes0, codes1) class TestKMean: - def test_large_features(self): + @array_api_compatible + def test_large_features(self, xp): # Generate a data set with large values, and run kmeans on it to # (regression for 1077). d = 300 @@ -185,21 +227,24 @@ def test_large_features(self): data[:x.shape[0]] = x data[x.shape[0]:] = y - kmeans(data, 2) + kmeans(xp.asarray(data), xp.asarray(2)) - def test_kmeans_simple(self): + @array_api_compatible + def test_kmeans_simple(self, xp): np.random.seed(54321) initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - for tp in np.array, matrix: + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] + for tp in arrays: code1 = kmeans(tp(X), tp(initc), iter=1)[0] assert_array_almost_equal(code1, CODET2) - def test_kmeans_lost_cluster(self): + @array_api_compatible + def test_kmeans_lost_cluster(self, xp): # This will cause kmeans to have a cluster with no points. - data = TESTDATA_2D - initk = np.array([[-1.8127404, -0.67128041], - [2.04621601, 0.07401111], - [-2.31149087, -0.05160469]]) + data = xp.asarray(TESTDATA_2D) + initk = xp.asarray([[-1.8127404, -0.67128041], + [2.04621601, 0.07401111], + [-2.31149087, -0.05160469]]) kmeans(data, initk) with suppress_warnings() as sup: @@ -210,58 +255,70 @@ def test_kmeans_lost_cluster(self): assert_raises(ClusterError, kmeans2, data, initk, missing='raise') - def test_kmeans2_simple(self): + @array_api_compatible + def test_kmeans2_simple(self, xp): np.random.seed(12345678) - initc = np.concatenate([[X[0]], [X[1]], [X[2]]]) - for tp in np.array, matrix: + initc = xp.asarray(np.concatenate([[X[0]], [X[1]], [X[2]]])) + arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix] + for tp in arrays: code1 = kmeans2(tp(X), tp(initc), iter=1)[0] code2 = kmeans2(tp(X), tp(initc), iter=2)[0] assert_array_almost_equal(code1, CODET1) assert_array_almost_equal(code2, CODET2) - def test_kmeans2_rank1(self): - data = TESTDATA_2D + @array_api_compatible + def test_kmeans2_rank1(self, xp): + data = xp.asarray(TESTDATA_2D) data1 = data[:, 0] initc = data1[:3] - code = initc.copy() + code = as_xparray(initc, copy=True, xp=xp) kmeans2(data1, code, iter=1)[0] kmeans2(data1, code, iter=2)[0] - def test_kmeans2_rank1_2(self): - data = TESTDATA_2D + @array_api_compatible + @skip_if_array_api_backend('numpy.array_api') + def test_kmeans2_rank1_2(self, xp): + data = xp.asarray(TESTDATA_2D) data1 = data[:, 0] - kmeans2(data1, 2, iter=1) + kmeans2(data1, xp.asarray(2), iter=1) - def test_kmeans2_high_dim(self): + @array_api_compatible + def test_kmeans2_high_dim(self, xp): # test kmeans2 when the number of dimensions exceeds the number # of input points - data = TESTDATA_2D - data = data.reshape((20, 20))[:10] - kmeans2(data, 2) - - def test_kmeans2_init(self): + data = xp.asarray(TESTDATA_2D) + data = xp.reshape(data, (20, 20))[:10, :] + kmeans2(data, xp.asarray(2)) + + @skip_if_array_api_gpu + @array_api_compatible + @skip_if_array_api_backend('numpy.array_api') + def test_kmeans2_init(self, xp): np.random.seed(12345) - data = TESTDATA_2D + data = xp.asarray(TESTDATA_2D) + k = xp.asarray(3) - kmeans2(data, 3, minit='points') - kmeans2(data[:, :1], 3, minit='points') # special case (1-D) + kmeans2(data, k, minit='points') + kmeans2(data[:, :1], k, minit='points') # special case (1-D) - kmeans2(data, 3, minit='++') - kmeans2(data[:, :1], 3, minit='++') # special case (1-D) + kmeans2(data, k, minit='++') + kmeans2(data[:, :1], k, minit='++') # special case (1-D) # minit='random' can give warnings, filter those with suppress_warnings() as sup: sup.filter(message="One of the clusters is empty. Re-run.") - kmeans2(data, 3, minit='random') - kmeans2(data[:, :1], 3, minit='random') # special case (1-D) + kmeans2(data, k, minit='random') + kmeans2(data[:, :1], k, minit='random') # special case (1-D) + @array_api_compatible + @skip_if_array_api_backend('numpy.array_api') @pytest.mark.skipif(sys.platform == 'win32', reason='Fails with MemoryError in Wine.') - def test_krandinit(self): - data = TESTDATA_2D - datas = [data.reshape((200, 2)), data.reshape((20, 20))[:10]] + def test_krandinit(self, xp): + data = xp.asarray(TESTDATA_2D) + datas = [xp.reshape(data, (200, 2)), xp.reshape(data, (20, 20))[:10, :]] k = int(1e6) for data in datas: # check that np.random.Generator can be used (numpy >= 1.17) @@ -270,37 +327,44 @@ def test_krandinit(self): else: rng = np.random.RandomState(1234) - init = _krandinit(data, k, rng) - orig_cov = np.cov(data, rowvar=0) - init_cov = np.cov(init, rowvar=0) + init = _krandinit(data, k, rng, xp) + orig_cov = xp.cov(data.T) + init_cov = xp.cov(init.T) assert_allclose(orig_cov, init_cov, atol=1e-2) - def test_kmeans2_empty(self): + @array_api_compatible + def test_kmeans2_empty(self, xp): # Regression test for gh-1032. - assert_raises(ValueError, kmeans2, [], 2) + assert_raises(ValueError, kmeans2, xp.asarray([]), xp.asarray(2)) + @skip_if_array_api def test_kmeans_0k(self): # Regression test for gh-1073: fail when k arg is 0. assert_raises(ValueError, kmeans, X, 0) assert_raises(ValueError, kmeans2, X, 0) assert_raises(ValueError, kmeans2, X, np.array([])) - def test_kmeans_large_thres(self): + @array_api_compatible + def test_kmeans_large_thres(self, xp): # Regression test for gh-1774 - x = np.array([1, 2, 3, 4, 10], dtype=float) - res = kmeans(x, 1, thresh=1e16) - assert_allclose(res[0], np.array([4.])) + x = xp.asarray([1, 2, 3, 4, 10], dtype=xp.float64) + res = kmeans(x, xp.asarray(1), thresh=1e16) + assert_allclose(res[0], xp.asarray([4.])) assert_allclose(res[1], 2.3999999999999999) - def test_kmeans2_kpp_low_dim(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_kmeans2_kpp_low_dim(self, xp): # Regression test for gh-11462 - prev_res = np.array([[-1.95266667, 0.898], - [-3.153375, 3.3945]]) + prev_res = xp.asarray([[-1.95266667, 0.898], + [-3.153375, 3.3945]]) np.random.seed(42) - res, _ = kmeans2(TESTDATA_2D, 2, minit='++') + res, _ = kmeans2(xp.asarray(TESTDATA_2D), xp.asarray(2), minit='++') assert_allclose(res, prev_res) - def test_kmeans2_kpp_high_dim(self): + @skip_if_array_api_gpu + @array_api_compatible + def test_kmeans2_kpp_high_dim(self, xp): # Regression test for gh-11462 n_dim = 100 size = 10 @@ -311,23 +375,25 @@ def test_kmeans2_kpp_high_dim(self): np.random.multivariate_normal(centers[0], np.eye(n_dim), size=size), np.random.multivariate_normal(centers[1], np.eye(n_dim), size=size) ]) - res, _ = kmeans2(data, 2, minit='++') + + data = xp.asarray(data) + res, _ = kmeans2(data, xp.asarray(2), minit='++') assert_array_almost_equal(res, centers, decimal=0) - def test_kmeans_diff_convergence(self): + @array_api_compatible + def test_kmeans_diff_convergence(self, xp): # Regression test for gh-8727 - obs = np.array([-3, -1, 0, 1, 1, 8], float) - res = kmeans(obs, np.array([-3., 0.99])) - assert_allclose(res[0], np.array([-0.4, 8.])) + obs = xp.asarray([-3, -1, 0, 1, 1, 8], dtype=xp.float64) + res = kmeans(obs, xp.asarray([-3., 0.99])) + assert_allclose(res[0], xp.asarray([-0.4, 8.])) assert_allclose(res[1], 1.0666666666666667) + @skip_if_array_api def test_kmeans_and_kmeans2_random_seed(self): - seed_list = [1234, np.random.RandomState(1234)] - - # check that np.random.Generator can be used (numpy >= 1.17) - if hasattr(np.random, 'default_rng'): - seed_list.append(np.random.default_rng(1234)) + seed_list = [ + 1234, np.random.RandomState(1234), np.random.default_rng(1234) + ] for seed in seed_list: # test for kmeans diff --git a/scipy/cluster/vq.py b/scipy/cluster/vq.py index 8770fb69fdc8..ea8664dd6880 100644 --- a/scipy/cluster/vq.py +++ b/scipy/cluster/vq.py @@ -67,8 +67,10 @@ import warnings import numpy as np from collections import deque -from scipy._lib._util import _asarray_validated, check_random_state,\ - rng_integers +from scipy._lib._array_api import ( + as_xparray, array_namespace, size, atleast_nd +) +from scipy._lib._util import check_random_state, rng_integers from scipy.spatial.distance import cdist from . import _vq @@ -129,10 +131,11 @@ def whiten(obs, check_finite=True): [ 1.75976538, 0.7038557 , 7.21248917]]) """ - obs = _asarray_validated(obs, check_finite=check_finite) - std_dev = obs.std(axis=0) + xp = array_namespace(obs) + obs = as_xparray(obs, check_finite=check_finite, xp=xp) + std_dev = xp.std(obs, axis=0) zero_std_mask = std_dev == 0 - if zero_std_mask.any(): + if xp.any(zero_std_mask): std_dev[zero_std_mask] = 1.0 warnings.warn("Some columns have standard deviation zero. " "The values of these columns will not change.", @@ -198,15 +201,19 @@ def vq(obs, code_book, check_finite=True): (array([1, 1, 0],'i'), array([ 0.43588989, 0.73484692, 0.83066239])) """ - obs = _asarray_validated(obs, check_finite=check_finite) - code_book = _asarray_validated(code_book, check_finite=check_finite) - ct = np.common_type(obs, code_book) - - c_obs = obs.astype(ct, copy=False) - c_code_book = code_book.astype(ct, copy=False) - - if np.issubdtype(ct, np.float64) or np.issubdtype(ct, np.float32): - return _vq.vq(c_obs, c_code_book) + xp = array_namespace(obs, code_book) + obs = as_xparray(obs, xp=xp, check_finite=check_finite) + code_book = as_xparray(code_book, xp=xp, check_finite=check_finite) + ct = xp.result_type(obs, code_book) + + c_obs = xp.astype(obs, ct, copy=False) + c_code_book = xp.astype(code_book, ct, copy=False) + + if xp.isdtype(ct, kind='real floating'): + c_obs = np.asarray(c_obs) + c_code_book = np.asarray(c_code_book) + result = _vq.vq(c_obs, c_code_book) + return xp.asarray(result[0]), xp.asarray(result[1]) return py_vq(obs, code_book, check_finite=False) @@ -247,23 +254,24 @@ def py_vq(obs, code_book, check_finite=True): It is about 20 times slower than the C version. """ - obs = _asarray_validated(obs, check_finite=check_finite) - code_book = _asarray_validated(code_book, check_finite=check_finite) + xp = array_namespace(obs, code_book) + obs = as_xparray(obs, xp=xp, check_finite=check_finite) + code_book = as_xparray(code_book, xp=xp, check_finite=check_finite) if obs.ndim != code_book.ndim: raise ValueError("Observation and code_book should have the same rank") if obs.ndim == 1: - obs = obs[:, np.newaxis] - code_book = code_book[:, np.newaxis] + obs = obs[:, xp.newaxis] + code_book = code_book[:, xp.newaxis] dist = cdist(obs, code_book) code = dist.argmin(axis=1) - min_dist = dist[np.arange(len(code)), code] + min_dist = dist[xp.arange(len(code)), code] return code, min_dist -def _kmeans(obs, guess, thresh=1e-5): +def _kmeans(obs, guess, thresh=1e-5, xp=None): """ "raw" version of k-means. Returns @@ -295,19 +303,25 @@ def _kmeans(obs, guess, thresh=1e-5): [ 0.73333333, 1.13333333]]), 0.40563916697728591) """ - - code_book = np.asarray(guess) - diff = np.inf + xp = np if xp is None else xp + code_book = guess + diff = xp.inf prev_avg_dists = deque([diff], maxlen=2) while diff > thresh: # compute membership and distances between obs and code_book obs_code, distort = vq(obs, code_book, check_finite=False) - prev_avg_dists.append(distort.mean(axis=-1)) + prev_avg_dists.append(xp.mean(distort, axis=-1)) # recalc code_book as centroids of associated obs + obs = np.asarray(obs) + obs_code = np.asarray(obs_code) code_book, has_members = _vq.update_cluster_means(obs, obs_code, code_book.shape[0]) + obs = xp.asarray(obs) + obs_code = xp.asarray(obs_code) + code_book = xp.asarray(code_book) + has_members = xp.asarray(has_members) code_book = code_book[has_members] - diff = np.absolute(prev_avg_dists[0] - prev_avg_dists[1]) + diff = xp.abs(prev_avg_dists[0] - prev_avg_dists[1]) return code_book, prev_avg_dists[1] @@ -447,21 +461,23 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, >>> plt.show() """ - obs = _asarray_validated(obs, check_finite=check_finite) + xp = array_namespace(obs, k_or_guess) + obs = as_xparray(obs, xp=xp, check_finite=check_finite) + guess = as_xparray(k_or_guess, xp=xp, check_finite=check_finite) if iter < 1: raise ValueError("iter must be at least 1, got %s" % iter) # Determine whether a count (scalar) or an initial guess (array) was passed. - if not np.isscalar(k_or_guess): - guess = _asarray_validated(k_or_guess, check_finite=check_finite) - if guess.size < 1: + if size(guess) > 1: + if size(guess) < 1: raise ValueError("Asked for 0 clusters. Initial book was %s" % guess) - return _kmeans(obs, guess, thresh=thresh) + elif size(guess) > 1: + return _kmeans(obs, guess, thresh=thresh, xp=xp) # k_or_guess is a scalar, now verify that it's an integer - k = int(k_or_guess) - if k != k_or_guess: + k = int(guess) + if k != guess: raise ValueError("If k_or_guess is a scalar, it must be an integer.") if k < 1: raise ValueError("Asked for %d clusters." % k) @@ -469,18 +485,18 @@ def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True, rng = check_random_state(seed) # initialize best distance value to a large value - best_dist = np.inf + best_dist = xp.inf for i in range(iter): # the initial code book is randomly selected from observations - guess = _kpoints(obs, k, rng) - book, dist = _kmeans(obs, guess, thresh=thresh) + guess = _kpoints(obs, k, rng, xp) + book, dist = _kmeans(obs, guess, thresh=thresh, xp=xp) if dist < best_dist: best_book = book best_dist = dist return best_book, best_dist -def _kpoints(data, k, rng): +def _kpoints(data, k, rng, xp): """Pick k points at random in data (one row = one observation). Parameters @@ -500,11 +516,11 @@ def _kpoints(data, k, rng): A 'k' by 'N' containing the initial centroids """ - idx = rng.choice(data.shape[0], size=k, replace=False) - return data[idx] + idx = rng.choice(data.shape[0], size=int(k), replace=False) + return data[idx, ...] -def _krandinit(data, k, rng): +def _krandinit(data, k, rng, xp): """Returns k samples of a random variable whose parameters depend on data. More precisely, it returns k observations sampled from a Gaussian random @@ -527,31 +543,35 @@ def _krandinit(data, k, rng): A 'k' by 'N' containing the initial centroids """ - mu = data.mean(axis=0) + mu = xp.mean(data, axis=0) if data.ndim == 1: - cov = np.cov(data) + cov = xp.cov(data) x = rng.standard_normal(size=k) - x *= np.sqrt(cov) + x = xp.asarray(x) + x *= xp.sqrt(cov) elif data.shape[1] > data.shape[0]: # initialize when the covariance matrix is rank deficient - _, s, vh = np.linalg.svd(data - mu, full_matrices=False) - x = rng.standard_normal(size=(k, s.size)) - sVh = s[:, None] * vh / np.sqrt(data.shape[0] - 1) - x = x.dot(sVh) + _, s, vh = xp.linalg.svd(data - mu, full_matrices=False) + x = rng.standard_normal(size=(k, size(s))) + x = xp.asarray(x) + sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - xp.asarray(1.)) + x = xp.matmul(x, sVh) else: - cov = np.atleast_2d(np.cov(data, rowvar=False)) + # TODO ARRAY_API cov not supported + cov = atleast_nd(xp.cov(data.T), ndim=2, xp=xp) # k rows, d cols (one row = one obs) # Generate k sample of a random variable ~ Gaussian(mu, cov) - x = rng.standard_normal(size=(k, mu.size)) - x = x.dot(np.linalg.cholesky(cov).T) + x = rng.standard_normal(size=(k, size(mu))) + x = xp.asarray(x) + x = xp.matmul(x, xp.linalg.cholesky(cov).T) x += mu return x -def _kpp(data, k, rng): +def _kpp(data, k, rng, xp): """ Picks k points in the data based on the kmeans++ method. Parameters @@ -578,18 +598,25 @@ def _kpp(data, k, rng): """ dims = data.shape[1] if len(data.shape) > 1 else 1 - init = np.ndarray((k, dims)) + + # k should be an integer, NOT a NumPy + # scalar array thing... + if not isinstance(k, int): + k = xp.astype(k, xp.int64) + + init = xp.empty((k, dims)) for i in range(k): if i == 0: - init[i, :] = data[rng_integers(rng, data.shape[0])] + init[i, :] = data[rng_integers(rng, data.shape[0]), :] else: D2 = cdist(init[:i,:], data, metric='sqeuclidean').min(axis=0) probs = D2/D2.sum() cumprobs = probs.cumsum() r = rng.uniform() - init[i, :] = data[np.searchsorted(cumprobs, r)] + cumprobs = np.asarray(cumprobs) + init[i, :] = data[np.searchsorted(cumprobs, r), :] return init @@ -745,7 +772,9 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', except KeyError as e: raise ValueError(f"Unknown missing method {missing!r}") from e - data = _asarray_validated(data, check_finite=check_finite) + xp = array_namespace(data, k) + data = as_xparray(data, xp=xp, check_finite=check_finite) + code_book = as_xparray(k, xp=xp, copy=True) if data.ndim == 1: d = 1 elif data.ndim == 2: @@ -753,24 +782,23 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', else: raise ValueError("Input of rank > 2 is not supported.") - if data.size < 1: + if size(data) < 1 or size(code_book) < 1: raise ValueError("Empty input is not supported.") # If k is not a single value, it should be compatible with data's shape - if minit == 'matrix' or not np.isscalar(k): - code_book = np.array(k, copy=True) + if minit == 'matrix' or size(code_book) > 1: if data.ndim != code_book.ndim: raise ValueError("k array doesn't match data rank") - nc = len(code_book) + nc = code_book.shape[0] if data.ndim > 1 and code_book.shape[1] != d: raise ValueError("k array doesn't match data dimension") else: - nc = int(k) + nc = int(code_book) if nc < 1: raise ValueError("Cannot ask kmeans2 for %d clusters" - " (k was %s)" % (nc, k)) - elif nc != k: + " (k was %s)" % (nc, code_book)) + elif nc != code_book: warnings.warn("k was not an integer, was converted.") try: @@ -779,12 +807,14 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', raise ValueError(f"Unknown init method {minit!r}") from e else: rng = check_random_state(seed) - code_book = init_meth(data, k, rng) + code_book = init_meth(data, code_book, rng, xp) for i in range(iter): # Compute the nearest neighbor for each obs using the current code book label = vq(data, code_book, check_finite=check_finite)[0] # Update the code book by computing centroids + data = np.asarray(data) + label = np.asarray(label) new_code_book, has_members = _vq.update_cluster_means(data, label, nc) if not has_members.all(): miss_meth() @@ -792,4 +822,4 @@ def kmeans2(data, k, iter=10, thresh=1e-5, minit='random', new_code_book[~has_members] = code_book[~has_members] code_book = new_code_book - return code_book, label + return xp.asarray(code_book), xp.asarray(label) diff --git a/scipy/conftest.py b/scipy/conftest.py index f6afc20a3d20..cc0e72dbc764 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -1,13 +1,17 @@ # Pytest customization +import json import os -import pytest import warnings import numpy as np +import numpy.array_api import numpy.testing as npt +import pytest + from scipy._lib._fpumode import get_fpu_mode from scipy._lib._testutils import FPUModeChangeWarning from scipy._lib import _pep440 +from scipy._lib._array_api import SCIPY_ARRAY_API, SCIPY_DEVICE def pytest_configure(config): @@ -93,3 +97,77 @@ def check_fpu_mode(request): warnings.warn("FPU mode changed from {:#x} to {:#x} during " "the test".format(old_mode, new_mode), category=FPUModeChangeWarning, stacklevel=0) + + +# Array API backend handling +xp_available_backends = {'numpy': np} + +if SCIPY_ARRAY_API and isinstance(SCIPY_ARRAY_API, str): + # fill the dict of backends with available libraries + xp_available_backends.update({'numpy.array_api': numpy.array_api}) + + try: + import torch # type: ignore[import] + xp_available_backends.update({'pytorch': torch}) + # can use `mps` or `cpu` + torch.set_default_device(SCIPY_DEVICE) + except ImportError: + pass + + try: + import cupy # type: ignore[import] + xp_available_backends.update({'cupy': cupy}) + except ImportError: + pass + + # by default, use all available backends + if SCIPY_ARRAY_API.lower() != "true": + SCIPY_ARRAY_API_ = json.loads(SCIPY_ARRAY_API) + + if 'all' in SCIPY_ARRAY_API_: + pass # same as True + else: + # only select a subset of backend by filtering out the dict + try: + xp_available_backends = { + backend: xp_available_backends[backend] + for backend in SCIPY_ARRAY_API_ + } + except KeyError: + msg = f"'--array-api-backend' must be in {xp_available_backends.keys()}" + raise ValueError(msg) + +if 'cupy' in xp_available_backends: + SCIPY_DEVICE = 'cuda' + +array_api_compatible = pytest.mark.parametrize("xp", xp_available_backends.values()) + +skip_if_array_api = pytest.mark.skipif( + SCIPY_ARRAY_API, + reason="do not run with Array API on", +) + +skip_if_array_api_gpu = pytest.mark.skipif( + SCIPY_ARRAY_API and SCIPY_DEVICE != 'cpu', + reason="do not run with Array API on and not on CPU", +) + + +def skip_if_array_api_backend(backend): + def wrapper(func): + reason = ( + f"do not run with Array API backend: {backend}" + ) + # method gets there as a function so we cannot use inspect.ismethod + if '.' in func.__qualname__: + def wrapped(self, *args, xp, **kwargs): + if xp.__name__ == backend: + pytest.skip(reason=reason) + return func(self, *args, xp, **kwargs) + else: + def wrapped(*args, xp, **kwargs): # type: ignore[misc] + if xp.__name__ == backend: + pytest.skip(reason=reason) + return func(*args, xp, **kwargs) + return wrapped + return wrapper