diff --git a/doc/sources/algorithms.rst b/doc/sources/algorithms.rst index c2038e4fcb..70f397f5ce 100755 --- a/doc/sources/algorithms.rst +++ b/doc/sources/algorithms.rst @@ -66,7 +66,7 @@ Classification all parameters except ``metric`` != `'euclidean'` or `'minkowski'` with ``p`` != `2` - For ``algorithm`` == `'brute'`: - all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`] + all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`] - Multi-output and sparse data are not supported * - :obj:`sklearn.linear_model.LogisticRegression` - All parameters are supported @@ -204,7 +204,7 @@ Nearest Neighbors all parameters except ``metric`` != `'euclidean'` or `'minkowski'` with ``p`` != `2` - For ``algorithm`` == 'brute': - all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`] + all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`] - Sparse data is not supported Other Tasks @@ -302,7 +302,7 @@ Classification - ``algorithm`` != `'brute'` - ``weights`` = `'callable'` - - ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`] + - ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`] - Only dense data is supported * - :obj:`sklearn.linear_model.LogisticRegression` - All parameters are supported except: @@ -421,7 +421,7 @@ Nearest Neighbors - ``algorithm`` != `'brute'` - ``weights`` = `'callable'` - - ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`] + - ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`] - Only dense data is supported Other Tasks @@ -487,7 +487,7 @@ Classification - ``algorithm`` != `'brute'` - ``weights`` = `'callable'` - - ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`] + - ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`] - ``predict_proba`` method not supported - Only dense data is supported * - :obj:`sklearn.linear_model.LogisticRegression` @@ -608,7 +608,7 @@ Nearest Neighbors - ``algorithm`` != `'brute'` - ``weights`` = `'callable'` - - ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`] + - ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`] - Only dense data is supported Other Tasks diff --git a/onedal/neighbors/neighbors.cpp b/onedal/neighbors/neighbors.cpp index fe458fc0b5..85238f3991 100644 --- a/onedal/neighbors/neighbors.cpp +++ b/onedal/neighbors/neighbors.cpp @@ -83,6 +83,12 @@ struct metric2t { Float, Method, cosine_distance::descriptor); + ONEDAL_PARAM_DISPATCH_VALUE(metric, + "correlation", + ops, + Float, + Method, + correlation_distance::descriptor); ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(metric); } diff --git a/onedal/primitives/__init__.py b/onedal/primitives/__init__.py index 39213819b5..c5bbf359af 100644 --- a/onedal/primitives/__init__.py +++ b/onedal/primitives/__init__.py @@ -16,6 +16,7 @@ from .get_tree import get_tree_state_cls, get_tree_state_reg from .kernel_functions import linear_kernel, poly_kernel, rbf_kernel, sigmoid_kernel +from .pairwise_distances import correlation_distance, cosine_distance __all__ = [ "get_tree_state_cls", @@ -24,4 +25,6 @@ "rbf_kernel", "poly_kernel", "sigmoid_kernel", + "correlation_distance", + "cosine_distance", ] diff --git a/onedal/primitives/correlation_distance.cpp b/onedal/primitives/correlation_distance.cpp new file mode 100644 index 0000000000..ffb4fc520f --- /dev/null +++ b/onedal/primitives/correlation_distance.cpp @@ -0,0 +1,46 @@ +/******************************************************************************* +* Copyright 2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "onedal/primitives/pairwise_distances.hpp" + +namespace py = pybind11; + +namespace oneapi::dal::python { + +ONEDAL_PY_DECLARE_INSTANTIATOR(init_distance_result); +ONEDAL_PY_DECLARE_INSTANTIATOR(init_distance_compute_ops); + +ONEDAL_PY_INIT_MODULE(correlation_distance) { + using namespace dal::detail; + using namespace correlation_distance; + using input_t = compute_input; + using result_t = compute_result; + using param2desc_t = distance_params2desc; + + auto sub = m.def_submodule("correlation_distance"); +#ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_distance_result, sub, result_t); + ONEDAL_PY_INSTANTIATE(init_distance_compute_ops, + sub, + policy_list, + input_t, + result_t, + param2desc_t, + method::dense); +#endif +} + +} // namespace oneapi::dal::python diff --git a/onedal/primitives/cosine_distance.cpp b/onedal/primitives/cosine_distance.cpp new file mode 100644 index 0000000000..80f9245fda --- /dev/null +++ b/onedal/primitives/cosine_distance.cpp @@ -0,0 +1,46 @@ +/******************************************************************************* +* Copyright 2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "onedal/primitives/pairwise_distances.hpp" + +namespace py = pybind11; + +namespace oneapi::dal::python { + +ONEDAL_PY_DECLARE_INSTANTIATOR(init_distance_result); +ONEDAL_PY_DECLARE_INSTANTIATOR(init_distance_compute_ops); + +ONEDAL_PY_INIT_MODULE(cosine_distance) { + using namespace dal::detail; + using namespace cosine_distance; + using input_t = compute_input; + using result_t = compute_result; + using param2desc_t = distance_params2desc; + + auto sub = m.def_submodule("cosine_distance"); +#ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_distance_result, sub, result_t); + ONEDAL_PY_INSTANTIATE(init_distance_compute_ops, + sub, + policy_list, + input_t, + result_t, + param2desc_t, + method::dense); +#endif +} + +} // namespace oneapi::dal::python diff --git a/onedal/primitives/pairwise_distances.hpp b/onedal/primitives/pairwise_distances.hpp index c94786f63a..d0095e5ca7 100755 --- a/onedal/primitives/pairwise_distances.hpp +++ b/onedal/primitives/pairwise_distances.hpp @@ -19,6 +19,7 @@ #include #include "oneapi/dal/algo/chebyshev_distance/common.hpp" +#include "oneapi/dal/algo/correlation_distance/common.hpp" #include "oneapi/dal/algo/cosine_distance/common.hpp" #include "oneapi/dal/algo/minkowski_distance/common.hpp" @@ -32,6 +33,8 @@ auto get_distance_descriptor(const pybind11::dict& params) { using method_t = typename Distance::method_t; using task_t = typename Distance::task_t; using minkowski_desc_t = minkowski_distance::descriptor; + using correlation_distance_desc_t = correlation_distance::descriptor; + using cosine_distance_desc_t = cosine_distance::descriptor; auto distance = Distance{}; if constexpr (std::is_same_v) { @@ -63,4 +66,24 @@ struct distance_params2desc { } }; +template +inline void init_distance_compute_ops(pybind11::module_& m) { + m.def("compute", + [](const Policy& policy, const pybind11::dict& params, const table& x, const table& y) { + compute_ops ops(policy, Input{ x, y }, Param2Desc{}); + return fptype2t{ distance_method2t{ DenseMethod{}, ops } }(params); + }); +} + +template +inline void init_distance_result(pybind11::module_& m) { + pybind11::class_(m, "result") + .def(pybind11::init()) + .def_property("values", &Result::get_values, &Result::set_values); +} + } // namespace oneapi::dal::python diff --git a/onedal/primitives/pairwise_distances.py b/onedal/primitives/pairwise_distances.py new file mode 100644 index 0000000000..46903bb55d --- /dev/null +++ b/onedal/primitives/pairwise_distances.py @@ -0,0 +1,112 @@ +# =============================================================================== +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +import numpy as np + +from onedal import _default_backend as backend +from onedal._device_offload import supports_queue +from onedal.common._backend import BackendFunction +from onedal.utils import _sycl_queue_manager as QM + +from ..datatypes import from_table, to_table +from ..utils.validation import _check_array + + +def _check_inputs(X, Y): + def check_input(data): + return _check_array(data, dtype=[np.float64, np.float32], force_all_finite=False) + + X = check_input(X) + Y = X if Y is None else check_input(Y) + return X, Y + + +def _compute_distance(params, submodule, X, Y): + # get policy for direct backend calls + + queue = QM.get_global_queue() + X, Y = to_table(X, Y, queue=queue) + params["fptype"] = X.dtype + compute_method = BackendFunction( + submodule.compute, backend, "compute", no_policy=False + ) + result = compute_method(params, X, Y) + return from_table(result.values) + + +@supports_queue +def correlation_distances(X, Y=None, queue=None): + """Compute the correlation distances between X and Y. + + D(x, y) = 1 - correlation_coefficient(x, y) + + where correlation_coefficient(x, y) = + sum((x - mean(x)) * (y - mean(y))) / (std(x) * std(y) * n) + + for each pair of rows x in X and y in Y. + + Parameters + ---------- + X : ndarray of shape (n_samples_X, n_features) + A feature array. + + Y : ndarray of shape (n_samples_Y, n_features) + An optional second feature array. If `None`, uses `Y=X`. + + queue : SyclQueue or None, default=None + SYCL Queue object for device code execution. Default + value None causes computation on host. + + Returns + ------- + distances : ndarray of shape (n_samples_X, n_samples_Y) + The correlation distances. + """ + + X, Y = _check_inputs(X, Y) + return _compute_distance( + {"method": "dense"}, backend.correlation_distance, X, Y + ) + + +@supports_queue +def cosine_distances(X, Y=None, queue=None): + """Compute the cosine distances between X and Y. + + D(x, y) = 1 - (x ยท y) / (||x|| * ||y||) + for each pair of rows x in X and y in Y. + + Parameters + ---------- + X : ndarray of shape (n_samples_X, n_features) + A feature array. + + Y : ndarray of shape (n_samples_Y, n_features) + An optional second feature array. If `None`, uses `Y=X`. + + queue : SyclQueue or None, default=None + SYCL Queue object for device code execution. Default + value None causes computation on host. + + Returns + ------- + distances : ndarray of shape (n_samples_X, n_samples_Y) + The cosine distances. + """ + X, Y = _check_inputs(X, Y) + return _compute_distance( + {"method": "dense"}, backend.cosine_distance, X, Y + ) diff --git a/sklearnex/neighbors/common.py b/sklearnex/neighbors/common.py index b8457fc3b9..793eb8d02b 100644 --- a/sklearnex/neighbors/common.py +++ b/sklearnex/neighbors/common.py @@ -223,6 +223,7 @@ def _onedal_supported(self, device, method_name, *data): "euclidean", "chebyshev", "cosine", + "correlation", ] onedal_kdtree_metrics = ["euclidean"] is_valid_for_brute = ( diff --git a/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py b/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py index d362cf0bac..a1590f9b2c 100644 --- a/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +++ b/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py @@ -245,7 +245,7 @@ def test_knnreg_spmd_gold(dataframe, queue): @pytest.mark.parametrize("n_neighbors", [1, 5, 20]) @pytest.mark.parametrize("weights", ["uniform", "distance"]) @pytest.mark.parametrize( - "metric", ["euclidean", "manhattan", "minkowski", "chebyshev", "cosine"] + "metric", ["euclidean", "manhattan", "minkowski", "chebyshev", "cosine", "correlation"] ) @pytest.mark.parametrize( "dataframe,queue",