Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions doc/sources/algorithms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Classification
all parameters except ``metric`` != `'euclidean'` or `'minkowski'` with ``p`` != `2`
- For ``algorithm`` == `'brute'`:

all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`]
all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`]
- Multi-output and sparse data are not supported
* - :obj:`sklearn.linear_model.LogisticRegression`
- All parameters are supported
Expand Down Expand Up @@ -204,7 +204,7 @@ Nearest Neighbors
all parameters except ``metric`` != `'euclidean'` or `'minkowski'` with ``p`` != `2`
- For ``algorithm`` == 'brute':

all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`]
all parameters except ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`]
- Sparse data is not supported

Other Tasks
Expand Down Expand Up @@ -302,7 +302,7 @@ Classification

- ``algorithm`` != `'brute'`
- ``weights`` = `'callable'`
- ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`]
- ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`]
- Only dense data is supported
* - :obj:`sklearn.linear_model.LogisticRegression`
- All parameters are supported except:
Expand Down Expand Up @@ -421,7 +421,7 @@ Nearest Neighbors

- ``algorithm`` != `'brute'`
- ``weights`` = `'callable'`
- ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`]
- ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`]
- Only dense data is supported

Other Tasks
Expand Down Expand Up @@ -487,7 +487,7 @@ Classification

- ``algorithm`` != `'brute'`
- ``weights`` = `'callable'`
- ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`]
- ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`]
- ``predict_proba`` method not supported
- Only dense data is supported
* - :obj:`sklearn.linear_model.LogisticRegression`
Expand Down Expand Up @@ -608,7 +608,7 @@ Nearest Neighbors

- ``algorithm`` != `'brute'`
- ``weights`` = `'callable'`
- ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`]
- ``metric`` not in [`'euclidean'`, `'manhattan'`, `'minkowski'`, `'chebyshev'`, `'cosine'`, `'correlation'`]
- Only dense data is supported

Other Tasks
Expand Down
6 changes: 6 additions & 0 deletions onedal/neighbors/neighbors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ struct metric2t {
Float,
Method,
cosine_distance::descriptor<Float>);
ONEDAL_PARAM_DISPATCH_VALUE(metric,
"correlation",
ops,
Float,
Method,
correlation_distance::descriptor<Float>);
ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(metric);
}

Expand Down
3 changes: 3 additions & 0 deletions onedal/primitives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from .get_tree import get_tree_state_cls, get_tree_state_reg
from .kernel_functions import linear_kernel, poly_kernel, rbf_kernel, sigmoid_kernel
from .pairwise_distances import correlation_distance, cosine_distance

__all__ = [
"get_tree_state_cls",
Expand All @@ -24,4 +25,6 @@
"rbf_kernel",
"poly_kernel",
"sigmoid_kernel",
"correlation_distance",
"cosine_distance",
]
46 changes: 46 additions & 0 deletions onedal/primitives/correlation_distance.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*******************************************************************************
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#include "onedal/primitives/pairwise_distances.hpp"

namespace py = pybind11;

namespace oneapi::dal::python {

ONEDAL_PY_DECLARE_INSTANTIATOR(init_distance_result);
ONEDAL_PY_DECLARE_INSTANTIATOR(init_distance_compute_ops);

ONEDAL_PY_INIT_MODULE(correlation_distance) {
using namespace dal::detail;
using namespace correlation_distance;
using input_t = compute_input<task::compute>;
using result_t = compute_result<task::compute>;
using param2desc_t = distance_params2desc<descriptor>;

auto sub = m.def_submodule("correlation_distance");
#ifndef ONEDAL_DATA_PARALLEL_SPMD
ONEDAL_PY_INSTANTIATE(init_distance_result, sub, result_t);
ONEDAL_PY_INSTANTIATE(init_distance_compute_ops,
sub,
policy_list,
input_t,
result_t,
param2desc_t,
method::dense);
#endif
}

} // namespace oneapi::dal::python
46 changes: 46 additions & 0 deletions onedal/primitives/cosine_distance.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*******************************************************************************
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#include "onedal/primitives/pairwise_distances.hpp"

namespace py = pybind11;

namespace oneapi::dal::python {

ONEDAL_PY_DECLARE_INSTANTIATOR(init_distance_result);
ONEDAL_PY_DECLARE_INSTANTIATOR(init_distance_compute_ops);

ONEDAL_PY_INIT_MODULE(cosine_distance) {
using namespace dal::detail;
using namespace cosine_distance;
using input_t = compute_input<task::compute>;
using result_t = compute_result<task::compute>;
using param2desc_t = distance_params2desc<descriptor>;

auto sub = m.def_submodule("cosine_distance");
#ifndef ONEDAL_DATA_PARALLEL_SPMD
ONEDAL_PY_INSTANTIATE(init_distance_result, sub, result_t);
ONEDAL_PY_INSTANTIATE(init_distance_compute_ops,
sub,
policy_list,
input_t,
result_t,
param2desc_t,
method::dense);
#endif
}

} // namespace oneapi::dal::python
23 changes: 23 additions & 0 deletions onedal/primitives/pairwise_distances.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <pybind11/pybind11.h>

#include "oneapi/dal/algo/chebyshev_distance/common.hpp"
#include "oneapi/dal/algo/correlation_distance/common.hpp"
#include "oneapi/dal/algo/cosine_distance/common.hpp"
#include "oneapi/dal/algo/minkowski_distance/common.hpp"

Expand All @@ -32,6 +33,8 @@ auto get_distance_descriptor(const pybind11::dict& params) {
using method_t = typename Distance::method_t;
using task_t = typename Distance::task_t;
using minkowski_desc_t = minkowski_distance::descriptor<float_t, method_t, task_t>;
using correlation_distance_desc_t = correlation_distance::descriptor<float_t, method_t, task_t>;
using cosine_distance_desc_t = cosine_distance::descriptor<float_t, method_t, task_t>;

auto distance = Distance{};
if constexpr (std::is_same_v<Distance, minkowski_desc_t>) {
Expand Down Expand Up @@ -63,4 +66,24 @@ struct distance_params2desc {
}
};

template <typename Policy,
typename Input,
typename Result,
typename Param2Desc,
typename DenseMethod>
inline void init_distance_compute_ops(pybind11::module_& m) {
m.def("compute",
[](const Policy& policy, const pybind11::dict& params, const table& x, const table& y) {
compute_ops ops(policy, Input{ x, y }, Param2Desc{});
return fptype2t{ distance_method2t{ DenseMethod{}, ops } }(params);
});
}

template <typename Result>
inline void init_distance_result(pybind11::module_& m) {
pybind11::class_<Result>(m, "result")
.def(pybind11::init())
.def_property("values", &Result::get_values, &Result::set_values);
}

} // namespace oneapi::dal::python
112 changes: 112 additions & 0 deletions onedal/primitives/pairwise_distances.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# ===============================================================================
# Copyright 2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============================================================================

import numpy as np

from onedal import _default_backend as backend
from onedal._device_offload import supports_queue
from onedal.common._backend import BackendFunction
from onedal.utils import _sycl_queue_manager as QM

from ..datatypes import from_table, to_table
from ..utils.validation import _check_array


def _check_inputs(X, Y):
def check_input(data):
return _check_array(data, dtype=[np.float64, np.float32], force_all_finite=False)

X = check_input(X)
Y = X if Y is None else check_input(Y)
return X, Y


def _compute_distance(params, submodule, X, Y):
# get policy for direct backend calls

queue = QM.get_global_queue()
X, Y = to_table(X, Y, queue=queue)
params["fptype"] = X.dtype
compute_method = BackendFunction(
submodule.compute, backend, "compute", no_policy=False
)
result = compute_method(params, X, Y)
return from_table(result.values)


@supports_queue
def correlation_distances(X, Y=None, queue=None):
"""Compute the correlation distances between X and Y.

D(x, y) = 1 - correlation_coefficient(x, y)

where correlation_coefficient(x, y) =
sum((x - mean(x)) * (y - mean(y))) / (std(x) * std(y) * n)

for each pair of rows x in X and y in Y.

Parameters
----------
X : ndarray of shape (n_samples_X, n_features)
A feature array.

Y : ndarray of shape (n_samples_Y, n_features)
An optional second feature array. If `None`, uses `Y=X`.

queue : SyclQueue or None, default=None
SYCL Queue object for device code execution. Default
value None causes computation on host.

Returns
-------
distances : ndarray of shape (n_samples_X, n_samples_Y)
The correlation distances.
"""

X, Y = _check_inputs(X, Y)
return _compute_distance(
{"method": "dense"}, backend.correlation_distance, X, Y
)


@supports_queue
def cosine_distances(X, Y=None, queue=None):
"""Compute the cosine distances between X and Y.

D(x, y) = 1 - (x · y) / (||x|| * ||y||)
for each pair of rows x in X and y in Y.

Parameters
----------
X : ndarray of shape (n_samples_X, n_features)
A feature array.

Y : ndarray of shape (n_samples_Y, n_features)
An optional second feature array. If `None`, uses `Y=X`.

queue : SyclQueue or None, default=None
SYCL Queue object for device code execution. Default
value None causes computation on host.

Returns
-------
distances : ndarray of shape (n_samples_X, n_samples_Y)
The cosine distances.
"""
X, Y = _check_inputs(X, Y)
return _compute_distance(
{"method": "dense"}, backend.cosine_distance, X, Y
)
1 change: 1 addition & 0 deletions sklearnex/neighbors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ def _onedal_supported(self, device, method_name, *data):
"euclidean",
"chebyshev",
"cosine",
"correlation",
]
onedal_kdtree_metrics = ["euclidean"]
is_valid_for_brute = (
Expand Down
2 changes: 1 addition & 1 deletion sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def test_knnreg_spmd_gold(dataframe, queue):
@pytest.mark.parametrize("n_neighbors", [1, 5, 20])
@pytest.mark.parametrize("weights", ["uniform", "distance"])
@pytest.mark.parametrize(
"metric", ["euclidean", "manhattan", "minkowski", "chebyshev", "cosine"]
"metric", ["euclidean", "manhattan", "minkowski", "chebyshev", "cosine", "correlation"]
)
@pytest.mark.parametrize(
"dataframe,queue",
Expand Down
Loading