Skip to content

Commit fc0f8f1

Browse files
authored
[kNN] Default d4p usage for classification and search (#972)
* use d4p for euclidean metrics * several fixes * chebyshev and cosine distances enabled on GPU * fix pep8 * fix parameter for parsing auto method
1 parent 2653ead commit fc0f8f1

File tree

4 files changed

+112
-22
lines changed

4 files changed

+112
-22
lines changed

onedal/neighbors/neighbors.py

Lines changed: 102 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@
2828
_num_samples
2929
)
3030

31+
from daal4py import (
32+
bf_knn_classification_training,
33+
bf_knn_classification_prediction,
34+
kdtree_knn_classification_training,
35+
kdtree_knn_classification_prediction
36+
)
3137
from onedal import _backend
3238

3339
from ..common._mixin import ClassifierMixin
@@ -43,7 +49,7 @@ def _parse_auto_method(self, method, n_samples, n_features):
4349
if (method in ['auto', 'ball_tree']):
4450
condition = self.n_neighbors is not None and \
4551
self.n_neighbors >= n_samples // 2
46-
if self.metric == 'precomputed' or n_features > 11 or condition:
52+
if self.metric == 'precomputed' or n_features > 15 or condition:
4753
result_method = 'brute'
4854
else:
4955
if self.metric == 'euclidean':
@@ -128,6 +134,22 @@ def _get_onedal_params(self, data):
128134
'result_option': 'indices|distances',
129135
}
130136

137+
def _get_daal_params(self, data):
138+
class_count = 0 if self.classes_ is None else len(self.classes_)
139+
weights = getattr(self, 'weights', 'uniform')
140+
params = {
141+
'fptype': 'float' if data.dtype is np.dtype('float32') else 'double',
142+
'method': 'defaultDense',
143+
'k': self.n_neighbors,
144+
'voteWeights': 'voteUniform' if weights == 'uniform' else 'voteDistance',
145+
'resultsToCompute': 'computeIndicesOfNeighbors|computeDistances',
146+
'resultsToEvaluate': 'none' if getattr(self, '_y', None) is None
147+
else 'computeClassLabels'
148+
}
149+
if class_count != 0:
150+
params['nClasses'] = class_count
151+
return params
152+
131153

132154
class NeighborsBase(NeighborsCommonBase, metaclass=ABCMeta):
133155
def __init__(self, n_neighbors=None, radius=None,
@@ -220,7 +242,7 @@ def _fit(self, X, y, queue):
220242
if y is not None and _is_regressor(self):
221243
self._y = y if self._shape is None else y.reshape(self._shape)
222244

223-
self._onedal_model = result.model
245+
self._onedal_model = result
224246
result = self
225247

226248
return result
@@ -272,12 +294,22 @@ def _kneighbors(self, X=None, n_neighbors=None,
272294
chunked_results = None
273295
method = super()._parse_auto_method(
274296
self._fit_method, self.n_samples_fit_, n_features)
275-
params = super()._get_onedal_params(X)
297+
298+
gpu_device = queue is not None and queue.sycl_device.is_gpu
299+
if self.effective_metric_ == 'euclidean' and not gpu_device:
300+
params = super()._get_daal_params(X)
301+
else:
302+
params = super()._get_onedal_params(X)
303+
276304
prediction_results = self._onedal_predict(
277305
self._onedal_model, X, params, queue=queue)
278306

279-
distances = from_table(prediction_results.distances)
280-
indices = from_table(prediction_results.indices)
307+
if self.effective_metric_ == 'euclidean' and not gpu_device:
308+
distances = prediction_results.distances
309+
indices = prediction_results.indices
310+
else:
311+
distances = from_table(prediction_results.distances)
312+
indices = from_table(prediction_results.indices)
281313

282314
if method == 'kd_tree':
283315
for i in range(distances.shape[0]):
@@ -348,15 +380,41 @@ def _get_onedal_params(self, data):
348380
params['result_option'] = 'responses'
349381
return params
350382

383+
def _get_daal_params(self, data):
384+
params = super()._get_daal_params(data)
385+
params['resultsToEvaluate'] = 'computeClassLabels'
386+
params['resultsToCompute'] = ''
387+
return params
388+
351389
def _onedal_fit(self, X, y, queue):
390+
gpu_device = queue is not None and queue.sycl_device.is_gpu
391+
if self.effective_metric_ == 'euclidean' and not gpu_device:
392+
params = self._get_daal_params(X)
393+
if self._fit_method == 'brute':
394+
train_alg = bf_knn_classification_training
395+
396+
else:
397+
train_alg = kdtree_knn_classification_training
398+
399+
return train_alg(**params).compute(X, y).model
400+
352401
policy = _get_policy(queue, X, y)
353402
params = self._get_onedal_params(X)
354403
train_alg = _backend.neighbors.classification.train(policy, params,
355404
*to_table(X, y))
356405

357-
return train_alg
406+
return train_alg.model
358407

359408
def _onedal_predict(self, model, X, params, queue):
409+
gpu_device = queue is not None and queue.sycl_device.is_gpu
410+
if self.effective_metric_ == 'euclidean' and not gpu_device:
411+
if self._fit_method == 'brute':
412+
predict_alg = bf_knn_classification_prediction
413+
414+
else:
415+
predict_alg = kdtree_knn_classification_prediction
416+
417+
return predict_alg(**params).compute(X, model)
360418
policy = _get_policy(queue, X)
361419

362420
if hasattr(self, '_onedal_model'):
@@ -390,10 +448,17 @@ def predict(self, X, queue=None):
390448

391449
self._validate_n_classes()
392450

393-
params = self._get_onedal_params(X)
451+
gpu_device = queue is not None and queue.sycl_device.is_gpu
452+
if self.effective_metric_ == 'euclidean' and not gpu_device:
453+
params = self._get_daal_params(X)
454+
else:
455+
params = self._get_onedal_params(X)
394456

395457
prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue)
396-
responses = from_table(prediction_result.responses)
458+
if self.effective_metric_ == 'euclidean' and not gpu_device:
459+
responses = prediction_result.prediction
460+
else:
461+
responses = from_table(prediction_result.responses)
397462
result = self.classes_.take(
398463
np.asarray(responses.ravel(), dtype=np.intp))
399464

@@ -458,15 +523,43 @@ def _get_onedal_params(self, data):
458523
params['result_option'] = 'indices|distances'
459524
return params
460525

526+
def _get_daal_params(self, data):
527+
params = super()._get_daal_params(data)
528+
params['resultsToCompute'] = 'computeIndicesOfNeighbors|computeDistances'
529+
params['resultsToEvaluate'] = 'none' if getattr(self, '_y', None) is None \
530+
else 'computeClassLabels'
531+
return params
532+
461533
def _onedal_fit(self, X, y, queue):
534+
gpu_device = queue is not None and queue.sycl_device.is_gpu
535+
if self.effective_metric_ == 'euclidean' and not gpu_device:
536+
params = self._get_daal_params(X)
537+
if self._fit_method == 'brute':
538+
train_alg = bf_knn_classification_training
539+
540+
else:
541+
train_alg = kdtree_knn_classification_training
542+
543+
return train_alg(**params).compute(X, y).model
544+
462545
policy = _get_policy(queue, X, y)
463546
params = self._get_onedal_params(X)
464547
train_alg = _backend.neighbors.search.train(policy, params,
465548
to_table(X))
466549

467-
return train_alg
550+
return train_alg.model
468551

469552
def _onedal_predict(self, model, X, params, queue):
553+
gpu_device = queue is not None and queue.sycl_device.is_gpu
554+
if self.effective_metric_ == 'euclidean' and not gpu_device:
555+
if self._fit_method == 'brute':
556+
predict_alg = bf_knn_classification_prediction
557+
558+
else:
559+
predict_alg = kdtree_knn_classification_prediction
560+
561+
return predict_alg(**params).compute(X, model)
562+
470563
policy = _get_policy(queue, X)
471564

472565
if hasattr(self, '_onedal_model'):

sklearnex/dispatcher.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from .svm import NuSVC as NuSVC_sklearnex
3434

3535
from .neighbors import KNeighborsClassifier as KNeighborsClassifier_sklearnex
36-
# from .neighbors import KNeighborsRegressor as KNeighborsRegressor_sklearnex
3736
from .neighbors import NearestNeighbors as NearestNeighbors_sklearnex
3837

3938
new_patching_available = True
@@ -65,17 +64,11 @@ def get_patch_map():
6564
# kNN
6665
mapping.pop('knn_classifier')
6766
mapping.pop('kneighborsclassifier')
68-
# TODO: make kNN regression patching through onedal ifaces
69-
# mapping.pop('knn_regressor')
70-
# mapping.pop('kneighborsregressor')
7167
mapping.pop('nearest_neighbors')
7268
mapping.pop('nearestneighbors')
7369
mapping['knn_classifier'] = [[(neighbors_module,
7470
'KNeighborsClassifier',
7571
KNeighborsClassifier_sklearnex), None]]
76-
# mapping['knn_regressor'] = [[(neighbors_module,
77-
# 'KNeighborsRegressor',
78-
# KNeighborsRegressor_sklearnex), None]]
7972
mapping['nearest_neighbors'] = [[(neighbors_module,
8073
'NearestNeighbors',
8174
NearestNeighbors_sklearnex), None]]

sklearnex/neighbors/knn_classification.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ def _onedal_gpu_supported(self, method_name, *data):
282282
if self._fit_method in ['auto', 'ball_tree']:
283283
condition = self.n_neighbors is not None and \
284284
self.n_neighbors >= self.n_samples_fit_ // 2
285-
if self.n_features_in_ > 11 or condition:
285+
if self.n_features_in_ > 15 or condition:
286286
result_method = 'brute'
287287
else:
288288
if self.effective_metric_ in ['euclidean']:
@@ -306,7 +306,9 @@ def _onedal_gpu_supported(self, method_name, *data):
306306
is_valid_for_brute = result_method in ['brute'] and \
307307
self.effective_metric_ in ['manhattan',
308308
'minkowski',
309-
'euclidean']
309+
'euclidean',
310+
'chebyshev',
311+
'cosine']
310312
is_valid_weights = self.weights in ['uniform', "distance"]
311313
main_condition = is_valid_for_brute and not is_sparse and \
312314
is_single_output and is_valid_weights
@@ -328,7 +330,7 @@ def _onedal_cpu_supported(self, method_name, *data):
328330
if self._fit_method in ['auto', 'ball_tree']:
329331
condition = self.n_neighbors is not None and \
330332
self.n_neighbors >= self.n_samples_fit_ // 2
331-
if self.n_features_in_ > 11 or condition:
333+
if self.n_features_in_ > 15 or condition:
332334
result_method = 'brute'
333335
else:
334336
if self.effective_metric_ in ['euclidean']:

sklearnex/neighbors/knn_unsupervised.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def _onedal_gpu_supported(self, method_name, *data):
230230
if self._fit_method in ['auto', 'ball_tree']:
231231
condition = self.n_neighbors is not None and \
232232
self.n_neighbors >= self.n_samples_fit_ // 2
233-
if self.n_features_in_ > 11 or condition:
233+
if self.n_features_in_ > 15 or condition:
234234
result_method = 'brute'
235235
else:
236236
if self.effective_metric_ in ['euclidean']:
@@ -244,7 +244,9 @@ def _onedal_gpu_supported(self, method_name, *data):
244244
is_valid_for_brute = result_method in ['brute'] and \
245245
self.effective_metric_ in ['manhattan',
246246
'minkowski',
247-
'euclidean']
247+
'euclidean',
248+
'chebyshev',
249+
'cosine']
248250
main_condition = is_valid_for_brute and not is_sparse
249251

250252
if method_name == 'neighbors.NearestNeighbors.fit':
@@ -262,7 +264,7 @@ def _onedal_cpu_supported(self, method_name, *data):
262264
if self._fit_method in ['auto', 'ball_tree']:
263265
condition = self.n_neighbors is not None and \
264266
self.n_neighbors >= self.n_samples_fit_ // 2
265-
if self.n_features_in_ > 11 or condition:
267+
if self.n_features_in_ > 15 or condition:
266268
result_method = 'brute'
267269
else:
268270
if self.effective_metric_ in ['euclidean']:

0 commit comments

Comments
 (0)