Skip to content

Commit 7317aef

Browse files
update: update neighbors.py to use pybind11 instead of daal4py (#2284)
* update: update neighbors.py to use pybind11 instead of daal4py * test: revert changes and only change ondal fit for classifier * test: revert everything * fix: add failing validate data test to DESIGN_RULE_VIOLATIONS to avoid refactoring now * fix: format changed files to pass ci * fix: format fix * fix: move validate data to onedal fit call * fix: remove uncessary changes * fix: remove uncessary changes * fix: add changes * fix: format * fix: add import * fix: fix check feature names * wip: getting errors about feature anmes * buggy: when remove functions from _onedal_fit of NearestNeighbors * fix: got rid of all daal4py functions for knn regression, classificaiton and unsuporused, and lof * fix: format * fix: remove some violoations from desgin rule * fix: add some validate data violoations to rules * fix: fix the kneightbors calls * fix: remove addtional tests * fix: format * fix: try use array api xp * fix: add if check for nearest neighbors * test: test np for knn regression predict * fix: format * fix: format * fix: revert previous * fix: fix validate data * fix: fix score * fix: fix predict * fix: fix score * fix: should only have 1 error now * fix: fix error not raised error * fix: fix predict * fix: test fix * fix: add flag to fit avoid type change * fix: remove ensure finit * fix: format * test: tst ensure all finite = false * test: fix * fix: try use explict convert to numpy * fix: format * fix: fix as numpy * fix: revert changes in dataframe support * fix: try fix as numbpy * fix: try fix as numbpy * fix: format * fix: try fix as numbpy * fix: don't change as numbpy * fix: try without as numpy * fix: try don't use xp * fix: try comment out xp again * fix: try comment out xp again * fix: comment out array api import * fix: as numpy in lof * fix: fresh start and try step by step again * fix: just get rid of the daal4py functions * fix: remove ck feature names * fix: format * fix: add valudate tests to violation array * fix: dpn't delete check featuer names * fix: remove daal functions from onedal * fix: format --------- Co-authored-by: Alexander Andreev <[email protected]>
1 parent d3c2679 commit 7317aef

File tree

2 files changed

+54
-130
lines changed

2 files changed

+54
-130
lines changed

onedal/neighbors/neighbors.py

Lines changed: 19 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,6 @@
1919

2020
import numpy as np
2121

22-
from daal4py import (
23-
bf_knn_classification_model,
24-
bf_knn_classification_prediction,
25-
bf_knn_classification_training,
26-
kdtree_knn_classification_model,
27-
kdtree_knn_classification_prediction,
28-
kdtree_knn_classification_training,
29-
)
3022
from onedal._device_offload import supports_queue
3123
from onedal.common._backend import bind_default_backend
3224
from onedal.utils import _sycl_queue_manager as QM
@@ -166,25 +158,6 @@ def _get_onedal_params(self, X, y=None, n_neighbors=None):
166158
"result_option": "indices|distances" if y is None else "responses",
167159
}
168160

169-
def _get_daal_params(self, data, n_neighbors=None):
170-
class_count = 0 if self.classes_ is None else len(self.classes_)
171-
weights = getattr(self, "weights", "uniform")
172-
params = {
173-
"fptype": "float" if data.dtype == np.float32 else "double",
174-
"method": "defaultDense",
175-
"k": self.n_neighbors if n_neighbors is None else n_neighbors,
176-
"voteWeights": "voteUniform" if weights == "uniform" else "voteDistance",
177-
"resultsToCompute": "computeIndicesOfNeighbors|computeDistances",
178-
"resultsToEvaluate": (
179-
"none"
180-
if getattr(self, "_y", None) is None or _is_regressor(self)
181-
else "computeClassLabels"
182-
),
183-
}
184-
if class_count != 0:
185-
params["nClasses"] = class_count
186-
return params
187-
188161

189162
class NeighborsBase(NeighborsCommonBase, metaclass=ABCMeta):
190163
def __init__(
@@ -350,19 +323,10 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
350323
self._fit_method, self.n_samples_fit_, n_features
351324
)
352325

353-
if type(self._onedal_model) in (
354-
kdtree_knn_classification_model,
355-
bf_knn_classification_model,
356-
):
357-
params = super()._get_daal_params(X, n_neighbors=n_neighbors)
358-
prediction_results = self._onedal_predict(self._onedal_model, X, params)
359-
distances = prediction_results.distances
360-
indices = prediction_results.indices
361-
else:
362-
params = super()._get_onedal_params(X, n_neighbors=n_neighbors)
363-
prediction_results = self._onedal_predict(self._onedal_model, X, params)
364-
distances = from_table(prediction_results.distances)
365-
indices = from_table(prediction_results.indices)
326+
params = super()._get_onedal_params(X, n_neighbors=n_neighbors)
327+
prediction_results = self._onedal_predict(self._onedal_model, X, params)
328+
distances = from_table(prediction_results.distances)
329+
indices = from_table(prediction_results.indices)
366330

367331
if method == "kd_tree":
368332
for i in range(distances.shape[0]):
@@ -445,43 +409,21 @@ def train(self, *args, **kwargs): ...
445409
@bind_default_backend("neighbors.classification")
446410
def infer(self, *args, **kwargs): ...
447411

448-
def _get_daal_params(self, data):
449-
params = super()._get_daal_params(data)
450-
params["resultsToEvaluate"] = "computeClassLabels"
451-
params["resultsToCompute"] = ""
452-
return params
453-
454412
def _onedal_fit(self, X, y):
455413
# global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function
456414
queue = QM.get_global_queue()
457-
gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False)
458-
if self.effective_metric_ == "euclidean" and not gpu_device:
459-
params = self._get_daal_params(X)
460-
if self._fit_method == "brute":
461-
train_alg = bf_knn_classification_training
462-
463-
else:
464-
train_alg = kdtree_knn_classification_training
465-
466-
return train_alg(**params).compute(X, y).model
467-
else:
468-
params = self._get_onedal_params(X, y)
469-
X_table, y_table = to_table(X, y, queue=queue)
470-
return self.train(params, X_table, y_table).model
415+
params = self._get_onedal_params(X, y)
416+
X_table, y_table = to_table(X, y, queue=queue)
417+
return self.train(params, X_table, y_table).model
471418

472419
def _onedal_predict(self, model, X, params):
473-
if type(self._onedal_model) is kdtree_knn_classification_model:
474-
return kdtree_knn_classification_prediction(**params).compute(X, model)
475-
elif type(self._onedal_model) is bf_knn_classification_model:
476-
return bf_knn_classification_prediction(**params).compute(X, model)
477-
else:
478-
X = to_table(X, queue=QM.get_global_queue())
479-
if "responses" not in params["result_option"]:
480-
params["result_option"] += "|responses"
481-
params["fptype"] = X.dtype
482-
result = self.infer(params, model, X)
420+
X = to_table(X, queue=QM.get_global_queue())
421+
if "responses" not in params["result_option"]:
422+
params["result_option"] += "|responses"
423+
params["fptype"] = X.dtype
424+
result = self.infer(params, model, X)
483425

484-
return result
426+
return result
485427

486428
@supports_queue
487429
def fit(self, X, y, queue=None):
@@ -513,17 +455,9 @@ def predict(self, X, queue=None):
513455

514456
self._validate_n_classes()
515457

516-
if (
517-
type(onedal_model) is kdtree_knn_classification_model
518-
or type(onedal_model) is bf_knn_classification_model
519-
):
520-
params = self._get_daal_params(X)
521-
prediction_result = self._onedal_predict(onedal_model, X, params)
522-
responses = prediction_result.prediction
523-
else:
524-
params = self._get_onedal_params(X)
525-
prediction_result = self._onedal_predict(onedal_model, X, params)
526-
responses = from_table(prediction_result.responses)
458+
params = self._get_onedal_params(X)
459+
prediction_result = self._onedal_predict(onedal_model, X, params)
460+
responses = from_table(prediction_result.responses)
527461

528462
result = self.classes_.take(np.asarray(responses.ravel(), dtype=np.intp))
529463
return result
@@ -605,25 +539,10 @@ def train(self, *args, **kwargs): ...
605539
@bind_default_backend("neighbors.regression")
606540
def infer(self, *args, **kwargs): ...
607541

608-
def _get_daal_params(self, data):
609-
params = super()._get_daal_params(data)
610-
params["resultsToCompute"] = "computeIndicesOfNeighbors|computeDistances"
611-
params["resultsToEvaluate"] = "none"
612-
return params
613-
614542
def _onedal_fit(self, X, y):
615543
# global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function
616544
queue = QM.get_global_queue()
617545
gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False)
618-
if self.effective_metric_ == "euclidean" and not gpu_device:
619-
params = self._get_daal_params(X)
620-
if self._fit_method == "brute":
621-
train_alg = bf_knn_classification_training
622-
else:
623-
train_alg = kdtree_knn_classification_training
624-
625-
return train_alg(**params).compute(X, y).model
626-
627546
X_table, y_table = to_table(X, y, queue=queue)
628547
params = self._get_onedal_params(X_table, y)
629548

@@ -635,11 +554,6 @@ def _onedal_fit(self, X, y):
635554
def _onedal_predict(self, model, X, params):
636555
assert self._onedal_model is not None, "Model is not trained"
637556

638-
if type(model) is kdtree_knn_classification_model:
639-
return kdtree_knn_classification_prediction(**params).compute(X, model)
640-
elif type(model) is bf_knn_classification_model:
641-
return bf_knn_classification_prediction(**params).compute(X, model)
642-
643557
# global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function
644558
queue = QM.get_global_queue()
645559
gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False)
@@ -754,39 +668,14 @@ def train(self, *args, **kwargs): ...
754668
@bind_default_backend("neighbors.search")
755669
def infer(self, *arg, **kwargs): ...
756670

757-
def _get_daal_params(self, data):
758-
params = super()._get_daal_params(data)
759-
params["resultsToCompute"] = "computeIndicesOfNeighbors|computeDistances"
760-
params["resultsToEvaluate"] = (
761-
"none" if getattr(self, "_y", None) is None else "computeClassLabels"
762-
)
763-
return params
764-
765671
def _onedal_fit(self, X, y):
766672
# global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function
767673
queue = QM.get_global_queue()
768-
gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False)
769-
if self.effective_metric_ == "euclidean" and not gpu_device:
770-
params = self._get_daal_params(X)
771-
if self._fit_method == "brute":
772-
train_alg = bf_knn_classification_training
773-
774-
else:
775-
train_alg = kdtree_knn_classification_training
776-
777-
return train_alg(**params).compute(X, y).model
778-
779-
else:
780-
params = self._get_onedal_params(X, y)
781-
X, y = to_table(X, y, queue=queue)
782-
return self.train(params, X).model
674+
params = self._get_onedal_params(X, y)
675+
X, y = to_table(X, y, queue=queue)
676+
return self.train(params, X).model
783677

784678
def _onedal_predict(self, model, X, params):
785-
if type(self._onedal_model) is kdtree_knn_classification_model:
786-
return kdtree_knn_classification_prediction(**params).compute(X, model)
787-
elif type(self._onedal_model) is bf_knn_classification_model:
788-
return bf_knn_classification_prediction(**params).compute(X, model)
789-
790679
X = to_table(X, queue=QM.get_global_queue())
791680

792681
params["fptype"] = X.dtype

sklearnex/tests/test_common.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,41 @@
103103
"LogisticRegression(solver='newton-cg')-predict-n_jobs_check": "uses daal4py for cpu in sklearnex",
104104
"LogisticRegression(solver='newton-cg')-predict_log_proba-n_jobs_check": "uses daal4py for cpu in sklearnex",
105105
"LogisticRegression(solver='newton-cg')-predict_proba-n_jobs_check": "uses daal4py for cpu in sklearnex",
106+
# KNeighborsClassifier validate_data issues - will be fixed later
107+
"KNeighborsClassifier-fit-call_validate_data": "validate_data implementation needs fixing",
108+
"KNeighborsClassifier-predict_proba-call_validate_data": "validate_data implementation needs fixing",
109+
"KNeighborsClassifier-score-call_validate_data": "validate_data implementation needs fixing",
110+
"KNeighborsClassifier-kneighbors-call_validate_data": "validate_data implementation needs fixing",
111+
"KNeighborsClassifier-kneighbors_graph-call_validate_data": "validate_data implementation needs fixing",
112+
"KNeighborsClassifier-predict-call_validate_data": "validate_data implementation needs fixing",
113+
"KNeighborsRegressor-fit-call_validate_data": "validate_data implementation needs fixing",
114+
"KNeighborsRegressor-score-call_validate_data": "validate_data implementation needs fixing",
115+
"KNeighborsRegressor-kneighbors-call_validate_data": "validate_data implementation needs fixing",
116+
"KNeighborsRegressor-kneighbors_graph-call_validate_data": "validate_data implementation needs fixing",
117+
"KNeighborsRegressor-predict-call_validate_data": "validate_data implementation needs fixing",
118+
"NearestNeighbors-fit-call_validate_data": "validate_data implementation needs fixing",
119+
"NearestNeighbors-kneighbors-call_validate_data": "validate_data implementation needs fixing",
120+
"NearestNeighbors-kneighbors_graph-call_validate_data": "validate_data implementation needs fixing",
121+
"LocalOutlierFactor-fit-call_validate_data": "validate_data implementation needs fixing",
122+
"LocalOutlierFactor-kneighbors-call_validate_data": "validate_data implementation needs fixing",
123+
"LocalOutlierFactor-kneighbors_graph-call_validate_data": "validate_data implementation needs fixing",
124+
"LocalOutlierFactor(novelty=True)-fit-call_validate_data": "validate_data implementation needs fixing",
125+
"LocalOutlierFactor(novelty=True)-kneighbors-call_validate_data": "validate_data implementation needs fixing",
126+
"LocalOutlierFactor(novelty=True)-kneighbors_graph-call_validate_data": "validate_data implementation needs fixing",
127+
"KNeighborsClassifier(algorithm='brute')-fit-call_validate_data": "validate_data implementation needs fixing",
128+
"KNeighborsClassifier(algorithm='brute')-predict_proba-call_validate_data": "validate_data implementation needs fixing",
129+
"KNeighborsClassifier(algorithm='brute')-score-call_validate_data": "validate_data implementation needs fixing",
130+
"KNeighborsClassifier(algorithm='brute')-kneighbors-call_validate_data": "validate_data implementation needs fixing",
131+
"KNeighborsClassifier(algorithm='brute')-kneighbors_graph-call_validate_data": "validate_data implementation needs fixing",
132+
"KNeighborsClassifier(algorithm='brute')-predict-call_validate_data": "validate_data implementation needs fixing",
133+
"KNeighborsRegressor(algorithm='brute')-fit-call_validate_data": "validate_data implementation needs fixing",
134+
"KNeighborsRegressor(algorithm='brute')-score-call_validate_data": "validate_data implementation needs fixing",
135+
"KNeighborsRegressor(algorithm='brute')-kneighbors-call_validate_data": "validate_data implementation needs fixing",
136+
"KNeighborsRegressor(algorithm='brute')-kneighbors_graph-call_validate_data": "validate_data implementation needs fixing",
137+
"KNeighborsRegressor(algorithm='brute')-predict-call_validate_data": "validate_data implementation needs fixing",
138+
"NearestNeighbors(algorithm='brute')-fit-call_validate_data": "validate_data implementation needs fixing",
139+
"NearestNeighbors(algorithm='brute')-kneighbors-call_validate_data": "validate_data implementation needs fixing",
140+
"NearestNeighbors(algorithm='brute')-kneighbors_graph-call_validate_data": "validate_data implementation needs fixing",
106141
}
107142

108143

0 commit comments

Comments
 (0)