diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 03aa3a6516d6..cdb8743b1ec2 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -65,6 +65,7 @@ "amin", "average", "bincount", + "corrcoef", "correlate", "cov", "max", @@ -360,6 +361,127 @@ def bincount(x1, weights=None, minlength=0): return call_origin(numpy.bincount, x1, weights=weights, minlength=minlength) +def corrcoef(x, y=None, rowvar=True, *, dtype=None): + """ + Return Pearson product-moment correlation coefficients. + + For full documentation refer to :obj:`numpy.corrcoef`. + + Parameters + ---------- + x : {dpnp.ndarray, usm_ndarray} + A 1-D or 2-D array containing multiple variables and observations. + Each row of `x` represents a variable, and each column a single + observation of all those variables. Also see `rowvar` below. + y : {None, dpnp.ndarray, usm_ndarray}, optional + An additional set of variables and observations. `y` has the same + shape as `x`. + Default: ``None``. + rowvar : {bool}, optional + If `rowvar` is ``True``, then each row represents a variable, + with observations in the columns. Otherwise, the relationship + is transposed: each column represents a variable, while the rows + contain observations. + Default: ``True``. + dtype : {None, dtype}, optional + Data-type of the result. + Default: ``None``. + + Returns + ------- + R : {dpnp.ndarray} + The correlation coefficient matrix of the variables. + + See Also + -------- + :obj:`dpnp.cov` : Covariance matrix. + + Examples + -------- + In this example we generate two random arrays, ``xarr`` and ``yarr``, and + compute the row-wise and column-wise Pearson correlation coefficients, + ``R``. Since `rowvar` is true by default, we first find the row-wise + Pearson correlation coefficients between the variables of ``xarr``. + + >>> import dpnp as np + >>> np.random.seed(123) + >>> xarr = np.random.rand(3, 3).astype(np.float32) + >>> xarr + array([[7.2858386e-17, 2.2066992e-02, 3.9520904e-01], + [4.8012391e-01, 5.9377134e-01, 4.5147297e-01], + [9.0728188e-01, 9.9387854e-01, 5.8399546e-01]], dtype=float32) + >>> R1 = np.corrcoef(xarr) + >>> R1 + array([[ 0.99999994, -0.6173796 , -0.9685411 ], + [-0.6173796 , 1. , 0.7937219 ], + [-0.9685411 , 0.7937219 , 0.9999999 ]], dtype=float32) + + If we add another set of variables and observations ``yarr``, we can + compute the row-wise Pearson correlation coefficients between the + variables in ``xarr`` and ``yarr``. + + >>> yarr = np.random.rand(3, 3).astype(np.float32) + >>> yarr + array([[0.17615308, 0.65354985, 0.15716429], + [0.09373496, 0.2123185 , 0.84086883], + [0.9011005 , 0.45206687, 0.00225109]], dtype=float32) + >>> R2 = np.corrcoef(xarr, yarr) + >>> R2 + array([[ 0.99999994, -0.6173796 , -0.968541 , -0.48613155, 0.9951523 , + -0.8900264 ], + [-0.6173796 , 1. , 0.7937219 , 0.9875833 , -0.53702235, + 0.19083664], + [-0.968541 , 0.7937219 , 0.9999999 , 0.6883078 , -0.9393724 , + 0.74857277], + [-0.48613152, 0.9875833 , 0.6883078 , 0.9999999 , -0.39783284, + 0.0342579 ], + [ 0.9951523 , -0.53702235, -0.9393725 , -0.39783284, 0.99999994, + -0.9305482 ], + [-0.89002645, 0.19083665, 0.7485727 , 0.0342579 , -0.9305482 , + 1. ]], dtype=float32) + + Finally if we use the option ``rowvar=False``, the columns are now + being treated as the variables and we will find the column-wise Pearson + correlation coefficients between variables in ``xarr`` and ``yarr``. + + >>> R3 = np.corrcoef(xarr, yarr, rowvar=False) + >>> R3 + array([[ 1. , 0.9724453 , -0.9909503 , 0.8104691 , -0.46436927, + -0.1643624 ], + [ 0.9724453 , 1. , -0.9949381 , 0.6515728 , -0.6580445 , + 0.07012729], + [-0.99095035, -0.994938 , 1. , -0.72450536, 0.5790461 , + 0.03047091], + [ 0.8104691 , 0.65157276, -0.72450536, 1. , 0.14243561, + -0.71102554], + [-0.4643693 , -0.6580445 , 0.57904613, 0.1424356 , 0.99999994, + -0.79727215], + [-0.1643624 , 0.07012729, 0.03047091, -0.7110255 , -0.7972722 , + 0.99999994]], dtype=float32) + """ + + out = dpnp.cov(x, y, rowvar, dtype=dtype) + if out.ndim == 0: + # scalar covariance + # nan if incorrect value (nan, inf, 0), 1 otherwise + return out / out + + d = dpnp.diag(out) + + stddev = dpnp.sqrt(d.real) + out /= stddev[:, None] + out /= stddev[None, :] + + # Clip real and imaginary parts to [-1, 1]. This does not guarantee + # abs(a[i,j]) <= 1 for complex arrays, but is the best we can do without + # excessive work. + dpnp.clip(out.real, -1, 1, out=out.real) + if dpnp.iscomplexobj(out): + dpnp.clip(out.imag, -1, 1, out=out.imag) + + return out + + def correlate(x1, x2, mode="valid"): """ Cross-correlation of two 1-dimensional sequences. diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index d0ee8bb0b617..db1beb0c6fc9 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -310,11 +310,6 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2 -tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef -tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception -tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar -tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y - tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear] tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower] tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher] diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 82e7c2abee13..797f1ecf86c3 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -320,11 +320,6 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2 -tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef -tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception -tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar -tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y - tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear] tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower] tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher] diff --git a/tests/test_statistics.py b/tests/test_statistics.py index 3baccad541b3..b0567216825a 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -573,6 +573,61 @@ def test_std_error(self): dpnp.std(ia, ddof="1") +class TestCorrcoef: + @pytest.mark.usefixtures( + "suppress_divide_invalid_numpy_warnings", + "suppress_dof_numpy_warnings", + ) + @pytest.mark.parametrize("dtype", get_all_dtypes()) + @pytest.mark.parametrize("rowvar", [True, False]) + def test_corrcoef(self, dtype, rowvar): + dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype) + np_array = dpnp.asnumpy(dp_array) + + expected = numpy.corrcoef(np_array, rowvar=rowvar) + result = dpnp.corrcoef(dp_array, rowvar=rowvar) + + assert_dtype_allclose(result, expected) + + @pytest.mark.usefixtures( + "suppress_divide_invalid_numpy_warnings", + "suppress_dof_numpy_warnings", + "suppress_mean_empty_slice_numpy_warnings", + ) + @pytest.mark.parametrize("shape", [(2, 0), (0, 2)]) + def test_corrcoef_empty(self, shape): + dp_array = dpnp.empty(shape, dtype=dpnp.int64) + np_array = dpnp.asnumpy(dp_array) + + result = dpnp.corrcoef(dp_array) + expected = numpy.corrcoef(np_array) + assert_dtype_allclose(result, expected) + + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize("dt_in", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize("dt_out", get_float_complex_dtypes()) + def test_corrcoef_dtype(self, dt_in, dt_out): + dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dt_in) + np_array = dpnp.asnumpy(dp_array) + + expected = numpy.corrcoef(np_array, dtype=dt_out) + result = dpnp.corrcoef(dp_array, dtype=dt_out) + assert expected.dtype == result.dtype + assert_allclose(result, expected, rtol=1e-6) + + @pytest.mark.usefixtures( + "suppress_divide_invalid_numpy_warnings", + "suppress_dof_numpy_warnings", + ) + def test_corrcoef_scalar(self): + dp_array = dpnp.array(5) + np_array = dpnp.asnumpy(dp_array) + + result = dpnp.corrcoef(dp_array) + expected = numpy.corrcoef(np_array) + assert_dtype_allclose(result, expected) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestBincount: @pytest.mark.parametrize( diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index a4e969f68430..604a60446c78 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -442,6 +442,7 @@ def test_meshgrid(device): pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]), pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]), pytest.param("copy", [1.0, 2.0, 3.0]), + pytest.param("corrcoef", [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), pytest.param( "cos", [-dpnp.pi / 2, -dpnp.pi / 4, 0.0, dpnp.pi / 4, dpnp.pi / 2] ), @@ -693,6 +694,11 @@ def test_reduce_hypot(device): pytest.param("append", [1, 2, 3], [4, 5, 6]), pytest.param("arctan2", [-1, +1, +1, -1], [-1, -1, +1, +1]), pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]), + pytest.param( + "corrcoef", + [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], + [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]], + ), pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]), pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]), pytest.param( diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index aa76443dad40..4aa74182ce77 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -576,6 +576,7 @@ def test_norm(usm_type, ord, axis): pytest.param("cbrt", [1, 8, 27]), pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]), pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]), + pytest.param("corrcoef", [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), pytest.param( "cos", [-dp.pi / 2, -dp.pi / 4, 0.0, dp.pi / 4, dp.pi / 2] ), @@ -685,6 +686,11 @@ def test_1in_1out(func, data, usm_type): pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]), pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]), pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]), + pytest.param( + "corrcoef", + [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], + [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]], + ), # dpnp.dot has 3 different implementations based on input arrays dtype # checking all of them pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), diff --git a/tests/third_party/cupy/statistics_tests/test_correlation.py b/tests/third_party/cupy/statistics_tests/test_correlation.py index be6b815629ff..2a4cef5f8c55 100644 --- a/tests/third_party/cupy/statistics_tests/test_correlation.py +++ b/tests/third_party/cupy/statistics_tests/test_correlation.py @@ -12,26 +12,26 @@ class TestCorrcoef(unittest.TestCase): @testing.for_all_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_corrcoef(self, xp, dtype): a = testing.shaped_arange((2, 3), xp, dtype) return xp.corrcoef(a) @testing.for_all_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_corrcoef_diag_exception(self, xp, dtype): a = testing.shaped_arange((1, 3), xp, dtype) return xp.corrcoef(a) @testing.for_all_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_corrcoef_y(self, xp, dtype): a = testing.shaped_arange((2, 3), xp, dtype) y = testing.shaped_arange((2, 3), xp, dtype) return xp.corrcoef(a, y=y) @testing.for_all_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_corrcoef_rowvar(self, xp, dtype): a = testing.shaped_arange((2, 3), xp, dtype) y = testing.shaped_arange((2, 3), xp, dtype)