Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
82fc26d
refactor: move/delete some methods in neighbors.py
yuejiaointel Oct 6, 2025
325753c
fix: try it again
yuejiaointel Oct 6, 2025
d17bb34
fix: try it again
yuejiaointel Oct 6, 2025
0e8b4c6
fix: try it again
yuejiaointel Oct 6, 2025
9dda937
fix: first round of refactor move preprocssing function to sklearnex
yuejiaointel Oct 6, 2025
8bd86c2
fix: fix shape
yuejiaointel Oct 7, 2025
debfcdf
rebase: rebase to main
yuejiaointel Oct 7, 2025
e9e7306
fix: add fit emthod logic in onedla
yuejiaointel Oct 7, 2025
02da9e9
fix: fix test
yuejiaointel Oct 7, 2025
62c8ddd
fix: fix tupleerror
yuejiaointel Oct 8, 2025
fc296b5
fix: fix tuple issue
yuejiaointel Oct 9, 2025
fe0abbb
print: print fit_x
yuejiaointel Oct 10, 2025
e202e65
fix: fixed tuple
yuejiaointel Oct 10, 2025
649fc5d
fix: fix tuple
yuejiaointel Oct 10, 2025
a1f95f1
print: print in save attributes
yuejiaointel Oct 10, 2025
939a4f6
fix: tuple handling
yuejiaointel Oct 10, 2025
a4b1351
print: add print
yuejiaointel Oct 10, 2025
39ae6c5
print: test print
yuejiaointel Oct 10, 2025
aa98829
test: test fix for typle
yuejiaointel Oct 13, 2025
2f834d0
fix: more print
yuejiaointel Oct 13, 2025
dcf5b43
fix: test fix for tuyple issue
yuejiaointel Oct 13, 2025
9c65647
fix: test fix for tuyple issue
yuejiaointel Oct 13, 2025
b33834d
fix: try add validation
yuejiaointel Oct 13, 2025
96762db
fix: try restore neighbors funcitons
yuejiaointel Oct 14, 2025
cc2293c
fix: test restore
yuejiaointel Oct 14, 2025
19fe8ce
fix: restore again
yuejiaointel Oct 14, 2025
0f37c1b
fix: restpore
yuejiaointel Oct 14, 2025
f984c42
fix: restore ad and add print
yuejiaointel Oct 14, 2025
f372bcb
fix: restore ad and add print
yuejiaointel Oct 14, 2025
169df26
fix: fix test as well
yuejiaointel Oct 14, 2025
2a2a800
fix: fix test
yuejiaointel Oct 14, 2025
4377198
fix: comment out validate data
yuejiaointel Oct 14, 2025
50f9b9d
fix: refactoredclassifier prepressing to sklearnex
yuejiaointel Oct 14, 2025
833f7ab
fix: add vlaidate data and see if it fix attributeerror
yuejiaointel Oct 14, 2025
a2af2ef
fix: fix onedal test
yuejiaointel Oct 14, 2025
0b601f9
fix: dpm
yuejiaointel Oct 14, 2025
97f9bd1
fix: refacto validate n classes
yuejiaointel Oct 14, 2025
e5300ca
fix: refacor kneighbors validation
yuejiaointel Oct 15, 2025
ae590e9
fix: add vlaidation data to rest of the functions
yuejiaointel Oct 15, 2025
0a2850e
fix: fix check n neighbors validation before check is fitted
yuejiaointel Oct 15, 2025
24bd02d
fix: fix when predict(none) is called by adding x is not none check
yuejiaointel Oct 15, 2025
2702322
fix: fix lof
yuejiaointel Oct 15, 2025
965389e
fix: add validation in kneihbors for lof
yuejiaointel Oct 15, 2025
5b8b091
fix: remove count valitation in onedal
yuejiaointel Oct 15, 2025
5e54b86
fix: refactor shape
yuejiaointel Oct 15, 2025
b16ecc8
refactor: neighbors processing logic to skleranex
yuejiaointel Oct 15, 2025
8c89422
fix: validationeighbors < samples after +1
yuejiaointel Oct 15, 2025
273a084
fix: fix assertion error
yuejiaointel Oct 16, 2025
35afada
fix: fix asswertion error by dispatch gpu/skl in sklearnex
yuejiaointel Oct 16, 2025
8cccb1d
refacor: onedal prediciton entirely to sklearnex
yuejiaointel Oct 16, 2025
5e01257
feature: array api in common.py
yuejiaointel Oct 16, 2025
8bec3dc
fix: assertion error
yuejiaointel Oct 17, 2025
bbab97a
feature: add array api support to knn skleranex files
yuejiaointel Oct 18, 2025
aab0100
fix: compatiibilty for array api
yuejiaointel Oct 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
746 changes: 420 additions & 326 deletions onedal/neighbors/neighbors.py

Large diffs are not rendered by default.

39 changes: 33 additions & 6 deletions onedal/neighbors/tests/test_knn_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,58 @@
from numpy.testing import assert_array_equal
from sklearn import datasets

from onedal.neighbors import KNeighborsClassifier
# REFACTOR: Import from sklearnex instead of onedal
# Classification processing now happens in sklearnex layer
from sklearnex.neighbors import KNeighborsClassifier
from onedal.tests.utils._device_selection import get_queues


@pytest.mark.parametrize("queue", get_queues())
def test_iris(queue):
import sys
print(f"\n=== DEBUG test_iris START: queue={queue} ===", file=sys.stderr)
# REFACTOR NOTE: queue parameter not used with sklearnex, but kept for test parametrization
iris = datasets.load_iris()
clf = KNeighborsClassifier(2).fit(iris.data, iris.target, queue=queue)
assert clf.score(iris.data, iris.target, queue=queue) > 0.9
print(f"DEBUG test: iris.data type={type(iris.data)}, shape={iris.data.shape}", file=sys.stderr)
print(f"DEBUG test: iris.target type={type(iris.target)}, shape={iris.target.shape}", file=sys.stderr)
print(f"DEBUG test: Creating KNeighborsClassifier and calling fit", file=sys.stderr)
clf = KNeighborsClassifier(2).fit(iris.data, iris.target)
print(f"DEBUG test: fit completed, clf._fit_X type={type(getattr(clf, '_fit_X', 'NOT_SET'))}", file=sys.stderr)
print(f"DEBUG test: Calling score", file=sys.stderr)
score = clf.score(iris.data, iris.target)
print(f"DEBUG test: score completed, score={score}", file=sys.stderr)
assert score > 0.9
assert_array_equal(clf.classes_, np.sort(clf.classes_))
print(f"=== DEBUG test_iris END ===\n", file=sys.stderr)


@pytest.mark.parametrize("queue", get_queues())
def test_pickle(queue):
import sys
print(f"\n=== DEBUG test_pickle START: queue={queue} ===", file=sys.stderr)
# REFACTOR NOTE: queue parameter not used with sklearnex, but kept for test parametrization
if queue and queue.sycl_device.is_gpu:
pytest.skip("KNN classifier pickling for the GPU sycl_queue is buggy.")
iris = datasets.load_iris()
clf = KNeighborsClassifier(2).fit(iris.data, iris.target, queue=queue)
expected = clf.predict(iris.data, queue=queue)
print(f"DEBUG test: iris.data type={type(iris.data)}, shape={iris.data.shape}", file=sys.stderr)
print(f"DEBUG test: iris.target type={type(iris.target)}, shape={iris.target.shape}", file=sys.stderr)
print(f"DEBUG test: Creating KNeighborsClassifier and calling fit", file=sys.stderr)
clf = KNeighborsClassifier(2).fit(iris.data, iris.target)
print(f"DEBUG test: fit completed, clf._fit_X type={type(getattr(clf, '_fit_X', 'NOT_SET'))}", file=sys.stderr)
print(f"DEBUG test: Calling predict", file=sys.stderr)
expected = clf.predict(iris.data)
print(f"DEBUG test: predict completed, expected type={type(expected)}, shape={expected.shape}", file=sys.stderr)

import pickle

print(f"DEBUG test: Pickling classifier", file=sys.stderr)
dump = pickle.dumps(clf)
print(f"DEBUG test: Unpickling classifier", file=sys.stderr)
clf2 = pickle.loads(dump)

assert type(clf2) == clf.__class__
result = clf2.predict(iris.data, queue=queue)
print(f"DEBUG test: Calling predict on unpickled classifier", file=sys.stderr)
result = clf2.predict(iris.data)
print(f"DEBUG test: predict completed, result type={type(result)}, shape={result.shape}", file=sys.stderr)
assert_array_equal(expected, result)
print(f"=== DEBUG test_pickle END ===\n", file=sys.stderr)
55 changes: 50 additions & 5 deletions sklearnex/neighbors/_lof.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from sklearnex.neighbors.knn_unsupervised import NearestNeighbors

from ..utils._array_api import get_namespace
from ..utils.validation import check_feature_names
from ..utils.validation import check_feature_names, validate_data


@control_n_jobs(decorated_methods=["fit", "kneighbors", "_kneighbors"])
Expand All @@ -53,9 +53,18 @@ class LocalOutlierFactor(KNeighborsDispatchingBase, _sklearn_LocalOutlierFactor)
_onedal_kneighbors = NearestNeighbors._onedal_kneighbors

def _onedal_fit(self, X, y, queue=None):
import sys
print(f"DEBUG LocalOutlierFactor._onedal_fit START: X type={type(X)}, y type={type(y)}", file=sys.stderr)
if sklearn_check_version("1.2"):
self._validate_params()

# REFACTOR: Use validate_data from sklearnex.utils.validation to convert pandas to numpy
X = validate_data(
self, X, dtype=[np.float64, np.float32], accept_sparse="csr"
)
print(f"DEBUG: After validate_data, X type={type(X)}", file=sys.stderr)

print(f"DEBUG LocalOutlierFactor._onedal_fit: Calling _onedal_knn_fit", file=sys.stderr)
self._onedal_knn_fit(X, y, queue=queue)

if self.contamination != "auto":
Expand All @@ -75,6 +84,7 @@ def _onedal_fit(self, X, y, queue=None):
)
self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))

print(f"DEBUG LocalOutlierFactor._onedal_fit: Calling _onedal_kneighbors", file=sys.stderr)
(
self._distances_fit_X_,
_neighbors_indices_fit_X_,
Expand Down Expand Up @@ -109,9 +119,12 @@ def _onedal_fit(self, X, y, queue=None):
"Increase the number of neighbors for more accurate results."
)

print(f"DEBUG LocalOutlierFactor._onedal_fit END: _fit_X type={type(getattr(self, '_fit_X', 'NOT_SET'))}", file=sys.stderr)
return self

def fit(self, X, y=None):
import sys
print(f"DEBUG LocalOutlierFactor.fit START: X type={type(X)}, X shape={getattr(X, 'shape', 'NO_SHAPE')}", file=sys.stderr)
result = dispatch(
self,
"fit",
Expand All @@ -122,9 +135,12 @@ def fit(self, X, y=None):
X,
None,
)
print(f"DEBUG LocalOutlierFactor.fit END: result type={type(result)}", file=sys.stderr)
return result

def _predict(self, X=None):
import sys
print(f"DEBUG LocalOutlierFactor._predict START: X type={type(X)}", file=sys.stderr)
check_is_fitted(self)

if X is not None:
Expand All @@ -136,6 +152,7 @@ def _predict(self, X=None):
is_inlier = np.ones(self.n_samples_fit_, dtype=int)
is_inlier[self.negative_outlier_factor_ < self.offset_] = -1

print(f"DEBUG LocalOutlierFactor._predict END: is_inlier type={type(is_inlier)}", file=sys.stderr)
return is_inlier

# This had to be done because predict loses the queue when no
Expand All @@ -146,13 +163,28 @@ def _predict(self, X=None):
@wraps(_sklearn_LocalOutlierFactor.fit_predict, assigned=["__doc__"])
@wrap_output_data
def fit_predict(self, X, y=None):
return self.fit(X)._predict()
import sys
print(f"DEBUG LocalOutlierFactor.fit_predict START: X type={type(X)}", file=sys.stderr)
result = self.fit(X)._predict()
print(f"DEBUG LocalOutlierFactor.fit_predict END: result type={type(result)}", file=sys.stderr)
return result

def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
import sys
print(f"DEBUG LocalOutlierFactor._kneighbors START: X type={type(X)}, n_neighbors={n_neighbors}, return_distance={return_distance}", file=sys.stderr)

# Validate n_neighbors parameter first (before check_is_fitted)
if n_neighbors is not None:
self._validate_n_neighbors(n_neighbors)

check_is_fitted(self)
if X is not None:
check_feature_names(self, X, reset=False)
return dispatch(

# Validate kneighbors parameters (inherited from KNeighborsDispatchingBase)
self._kneighbors_validation(X, n_neighbors)

result = dispatch(
self,
"kneighbors",
{
Expand All @@ -163,14 +195,25 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
n_neighbors=n_neighbors,
return_distance=return_distance,
)
print(f"DEBUG LocalOutlierFactor._kneighbors END: result type={type(result)}", file=sys.stderr)
return result

kneighbors = wrap_output_data(_kneighbors)

@available_if(_sklearn_LocalOutlierFactor._check_novelty_score_samples)
@wraps(_sklearn_LocalOutlierFactor.score_samples, assigned=["__doc__"])
@wrap_output_data
def score_samples(self, X):
import sys
print(f"DEBUG LocalOutlierFactor.score_samples START: X type={type(X)}", file=sys.stderr)
check_is_fitted(self)

# Validate and convert X (pandas to numpy if needed)
X = validate_data(
self, X, dtype=[np.float64, np.float32], accept_sparse="csr", reset=False
)

check_feature_names(self, X, reset=False)

distances_X, neighbors_indices_X = self._kneighbors(
X, n_neighbors=self.n_neighbors_
Expand All @@ -183,7 +226,9 @@ def score_samples(self, X):

lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]

return -np.mean(lrd_ratios_array, axis=1)
result = -np.mean(lrd_ratios_array, axis=1)
print(f"DEBUG LocalOutlierFactor.score_samples END: result type={type(result)}", file=sys.stderr)
return result

fit.__doc__ = _sklearn_LocalOutlierFactor.fit.__doc__
kneighbors.__doc__ = _sklearn_LocalOutlierFactor.kneighbors.__doc__
kneighbors.__doc__ = _sklearn_LocalOutlierFactor.kneighbors.__doc__
Loading
Loading