diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 857f576b1c42..e462dc806ea0 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -37,6 +37,8 @@ """ +import math + import dpctl.tensor as dpt import dpctl.tensor._tensor_elementwise_impl as ti import dpctl.utils as dpu @@ -481,7 +483,7 @@ def _get_padding(a_size, v_size, mode): r_pad = v_size - l_pad - 1 elif mode == "full": l_pad, r_pad = v_size - 1, v_size - 1 - else: + else: # pragma: no cover raise ValueError( f"Unknown mode: {mode}. Only 'valid', 'same', 'full' are supported." ) @@ -489,16 +491,58 @@ def _get_padding(a_size, v_size, mode): return l_pad, r_pad -def _run_native_sliding_dot_product1d(a, v, l_pad, r_pad): +def _choose_conv_method(a, v, rdtype): + assert a.size >= v.size + if rdtype == dpnp.bool: + # to avoid accuracy issues + return "direct" + + if v.size < 10**4 or a.size < 10**4: + # direct method is faster for small arrays + return "direct" + + if dpnp.issubdtype(rdtype, dpnp.integer): + max_a = int(dpnp.max(dpnp.abs(a))) + sum_v = int(dpnp.sum(dpnp.abs(v))) + max_value = int(max_a * sum_v) + + default_float = dpnp.default_float_type(a.sycl_device) + if max_value > 2 ** numpy.finfo(default_float).nmant - 1: + # can't represent the result in the default float type + return "direct" # pragma: no covers + + if dpnp.issubdtype(rdtype, dpnp.number): + return "fft" + + raise ValueError(f"Unsupported dtype: {rdtype}") # pragma: no cover + + +def _run_native_sliding_dot_product1d(a, v, l_pad, r_pad, rdtype): queue = a.sycl_queue + device = a.sycl_device + + supported_types = statistics_ext.sliding_dot_product1d_dtypes() + supported_dtype = to_supported_dtypes(rdtype, supported_types, device) + + if supported_dtype is None: # pragma: no cover + raise ValueError( + f"function does not support input types " + f"({a.dtype.name}, {v.dtype.name}), " + "and the inputs could not be coerced to any " + f"supported types. List of supported types: " + f"{[st.name for st in supported_types]}" + ) + + a_casted = dpnp.asarray(a, dtype=supported_dtype, order="C") + v_casted = dpnp.asarray(v, dtype=supported_dtype, order="C") - usm_type = dpu.get_coerced_usm_type([a.usm_type, v.usm_type]) - out_size = l_pad + r_pad + a.size - v.size + 1 + usm_type = dpu.get_coerced_usm_type([a_casted.usm_type, v_casted.usm_type]) + out_size = l_pad + r_pad + a_casted.size - v_casted.size + 1 # out type is the same as input type - out = dpnp.empty_like(a, shape=out_size, usm_type=usm_type) + out = dpnp.empty_like(a_casted, shape=out_size, usm_type=usm_type) - a_usm = dpnp.get_usm_ndarray(a) - v_usm = dpnp.get_usm_ndarray(v) + a_usm = dpnp.get_usm_ndarray(a_casted) + v_usm = dpnp.get_usm_ndarray(v_casted) out_usm = dpnp.get_usm_ndarray(out) _manager = dpu.SequentialOrderManager[queue] @@ -516,7 +560,30 @@ def _run_native_sliding_dot_product1d(a, v, l_pad, r_pad): return out -def correlate(a, v, mode="valid"): +def _convolve_fft(a, v, l_pad, r_pad, rtype): + assert a.size >= v.size + assert l_pad < v.size + + # +1 is needed to avoid circular convolution + padded_size = a.size + r_pad + 1 + fft_size = 2 ** int(math.ceil(math.log2(padded_size))) + + af = dpnp.fft.fft(a, fft_size) # pylint: disable=no-member + vf = dpnp.fft.fft(v, fft_size) # pylint: disable=no-member + + r = dpnp.fft.ifft(af * vf) # pylint: disable=no-member + if dpnp.issubdtype(rtype, dpnp.floating): + r = r.real + elif dpnp.issubdtype(rtype, dpnp.integer) or rtype == dpnp.bool: + r = r.real.round() + + start = v.size - 1 - l_pad + end = padded_size - 1 + + return r[start:end] + + +def correlate(a, v, mode="valid", method="auto"): r""" Cross-correlation of two 1-dimensional sequences. @@ -541,6 +608,20 @@ def correlate(a, v, mode="valid"): is ``"valid"``, unlike :obj:`dpnp.convolve`, which uses ``"full"``. Default: ``"valid"``. + method : {"auto", "direct", "fft"}, optional + Specifies which method to use to calculate the correlation: + + - `"direct"` : The correlation is determined directly from sums. + - `"fft"` : The Fourier Transform is used to perform the calculations. + This method is faster for long sequences but can have accuracy issues. + - `"auto"` : Automatically chooses direct or Fourier method based on + an estimate of which is faster. + + Note: Use of the FFT convolution on input containing NAN or INF + will lead to the entire output being NAN or INF. + Use method='direct' when your input contains NAN or INF values. + + Default: ``"auto"``. Returns ------- @@ -608,20 +689,14 @@ def correlate(a, v, mode="valid"): f"Received shapes: a.shape={a.shape}, v.shape={v.shape}" ) - supported_types = statistics_ext.sliding_dot_product1d_dtypes() + supported_methods = ["auto", "direct", "fft"] + if method not in supported_methods: + raise ValueError( + f"Unknown method: {method}. Supported methods: {supported_methods}" + ) device = a.sycl_device rdtype = result_type_for_device([a.dtype, v.dtype], device) - supported_dtype = to_supported_dtypes(rdtype, supported_types, device) - - if supported_dtype is None: # pragma: no cover - raise ValueError( - f"function does not support input types " - f"({a.dtype.name}, {v.dtype.name}), " - "and the inputs could not be coerced to any " - f"supported types. List of supported types: " - f"{[st.name for st in supported_types]}" - ) if dpnp.issubdtype(v.dtype, dpnp.complexfloating): v = dpnp.conj(v) @@ -633,10 +708,15 @@ def correlate(a, v, mode="valid"): l_pad, r_pad = _get_padding(a.size, v.size, mode) - a_casted = dpnp.asarray(a, dtype=supported_dtype, order="C") - v_casted = dpnp.asarray(v, dtype=supported_dtype, order="C") + if method == "auto": + method = _choose_conv_method(a, v, rdtype) - r = _run_native_sliding_dot_product1d(a_casted, v_casted, l_pad, r_pad) + if method == "direct": + r = _run_native_sliding_dot_product1d(a, v, l_pad, r_pad, rdtype) + elif method == "fft": + r = _convolve_fft(a, v[::-1], l_pad, r_pad, rdtype) + else: # pragma: no cover + raise ValueError(f"Unknown method: {method}") if revert: r = r[::-1] diff --git a/dpnp/tests/helper.py b/dpnp/tests/helper.py index 0e4757375842..be72c570cfc5 100644 --- a/dpnp/tests/helper.py +++ b/dpnp/tests/helper.py @@ -13,6 +13,7 @@ def assert_dtype_allclose( check_type=True, check_only_type_kind=False, factor=8, + relative_factor=None, ): """ Assert DPNP and NumPy array based on maximum dtype resolution of input arrays @@ -183,6 +184,7 @@ def generate_random_numpy_array( seed_value=None, low=-10, high=10, + probability=0.5, ): """ Generate a random numpy array with the specified shape and dtype. @@ -197,23 +199,32 @@ def generate_random_numpy_array( dtype : str or dtype, optional Desired data-type for the output array. If not specified, data type will be determined by numpy. + Default : ``None`` order : {"C", "F"}, optional Specify the memory layout of the output array. + Default: ``"C"``. hermitian : bool, optional If True, generates a Hermitian (symmetric if `dtype` is real) matrix. + Default : ``False`` seed_value : int, optional The seed value to initialize the random number generator. + Default : ``None`` low : {int, float}, optional Lower boundary of the generated samples from a uniform distribution. + Default : ``-10``. high : {int, float}, optional Upper boundary of the generated samples from a uniform distribution. + Default : ``10``. + probability : float, optional + If dtype is bool, the probability of True. Ignored for other dtypes. + Default : ``0.5``. Returns ------- out : numpy.ndarray @@ -232,9 +243,15 @@ def generate_random_numpy_array( # dtype=int is needed for 0d arrays size = numpy.prod(shape, dtype=int) - a = numpy.random.uniform(low, high, size).astype(dtype) - if numpy.issubdtype(a.dtype, numpy.complexfloating): - a += 1j * numpy.random.uniform(low, high, size) + if dtype == dpnp.bool: + a = numpy.random.choice( + [False, True], size, p=[1 - probability, probability] + ) + else: + a = numpy.random.uniform(low, high, size).astype(dtype) + + if numpy.issubdtype(a.dtype, numpy.complexfloating): + a += 1j * numpy.random.uniform(low, high, size) a = a.reshape(shape) if hermitian and a.size > 0: diff --git a/dpnp/tests/test_statistics.py b/dpnp/tests/test_statistics.py index 23317236367d..d5d0e72b030d 100644 --- a/dpnp/tests/test_statistics.py +++ b/dpnp/tests/test_statistics.py @@ -180,26 +180,101 @@ def test_corrcoef_scalar(self): class TestCorrelate: + @staticmethod + def _get_kwargs(mode=None, method=None): + dpnp_kwargs = {} + numpy_kwargs = {} + if mode is not None: + dpnp_kwargs["mode"] = mode + numpy_kwargs["mode"] = mode + if method is not None: + dpnp_kwargs["method"] = method + return dpnp_kwargs, numpy_kwargs + + def setup_method(self): + numpy.random.seed(0) + @pytest.mark.parametrize( "a, v", [([1], [1, 2, 3]), ([1, 2, 3], [1]), ([1, 2, 3], [1, 2])] ) @pytest.mark.parametrize("mode", [None, "full", "valid", "same"]) @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) - def test_correlate(self, a, v, mode, dtype): + @pytest.mark.parametrize("method", [None, "auto", "direct", "fft"]) + def test_correlate(self, a, v, mode, dtype, method): an = numpy.array(a, dtype=dtype) vn = numpy.array(v, dtype=dtype) ad = dpnp.array(an) vd = dpnp.array(vn) - if mode is None: - expected = numpy.correlate(an, vn) - result = dpnp.correlate(ad, vd) - else: - expected = numpy.correlate(an, vn, mode=mode) - result = dpnp.correlate(ad, vd, mode=mode) + dpnp_kwargs, numpy_kwargs = self._get_kwargs(mode, method) + + expected = numpy.correlate(an, vn, **numpy_kwargs) + result = dpnp.correlate(ad, vd, **dpnp_kwargs) assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("a_size", [1, 100, 10000]) + @pytest.mark.parametrize("v_size", [1, 100, 10000]) + @pytest.mark.parametrize("mode", ["full", "valid", "same"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + @pytest.mark.parametrize("method", ["auto", "direct", "fft"]) + def test_correlate_random(self, a_size, v_size, mode, dtype, method): + an = generate_random_numpy_array(a_size, dtype, probability=0.9) + vn = generate_random_numpy_array(v_size, dtype, probability=0.9) + + ad = dpnp.array(an) + vd = dpnp.array(vn) + + dpnp_kwargs, numpy_kwargs = self._get_kwargs(mode, method) + + result = dpnp.correlate(ad, vd, **dpnp_kwargs) + expected = numpy.correlate(an, vn, **numpy_kwargs) + + rdtype = result.dtype + if dpnp.issubdtype(rdtype, dpnp.integer): + rdtype = dpnp.default_float_type(ad.device) + + if method != "fft" and ( + dpnp.issubdtype(dtype, dpnp.integer) or dtype == dpnp.bool + ): + # For 'direct' and 'auto' methods, we expect exact results for integer types + assert_array_equal(result, expected) + else: + result = result.astype(rdtype) + if method == "direct": + expected = numpy.correlate(an, vn, **numpy_kwargs) + # For 'direct' method we can use standard validation + # acceptable error depends on the kernel size + # while error grows linearly with the kernel size, + # this empirically found formula provides a good balance + # the resulting factor is 40 for kernel size = 1, + # 400 for kernel size = 100 and 4000 for kernel size = 10000 + factor = int(40 * (min(a_size, v_size) ** 0.5)) + assert_dtype_allclose(result, expected, factor=factor) + else: + rtol = 1e-3 + atol = 1e-3 + + if rdtype == dpnp.float64 or rdtype == dpnp.complex128: + rtol = 1e-6 + atol = 1e-6 + elif rdtype == dpnp.bool: + result = result.astype(dpnp.int32) + rdtype = result.dtype + + expected = expected.astype(rdtype) + + diff = numpy.abs(result.asnumpy() - expected) + invalid = diff > atol + rtol * numpy.abs(expected) + + # When using the 'fft' method, we might encounter outliers. + # This usually happens when the resulting array contains values close to zero. + # For these outliers, the relative error can be significant. + # We can tolerate a few such outliers. + max_outliers = 10 if expected.size > 1 else 0 + if invalid.sum() > max_outliers: + assert_dtype_allclose(result, expected, factor=1000) + def test_correlate_mode_error(self): a = dpnp.arange(5) v = dpnp.arange(3) @@ -240,7 +315,7 @@ def test_correlate_different_sizes(self, size): vd = dpnp.array(v) expected = numpy.correlate(a, v) - result = dpnp.correlate(ad, vd) + result = dpnp.correlate(ad, vd, method="direct") assert_dtype_allclose(result, expected, factor=20) @@ -251,6 +326,13 @@ def test_correlate_another_sycl_queue(self): with pytest.raises(ValueError): dpnp.correlate(a, v) + def test_correlate_unkown_method(self): + a = dpnp.arange(5) + v = dpnp.arange(3) + + with pytest.raises(ValueError): + dpnp.correlate(a, v, method="unknown") + class TestCov: @pytest.mark.parametrize(