uxlfoundation
diff --git a/‎onedal/neighbors/neighbors.py‎
Lines changed: 28 additions & 155 deletions b/‎onedal/neighbors/neighbors.py‎
Lines changed: 28 additions & 155 deletions
diff --git a/‎sklearnex/neighbors/common.py‎
Lines changed: 110 additions & 2 deletions b/‎sklearnex/neighbors/common.py‎
Lines changed: 110 additions & 2 deletions
@@ -65,18 +65,9 @@ def _get_onedal_params(self, X, y=None, n_neighbors=None):
         try:
             fptype = X.dtype
         except AttributeError:
-            # For pandas DataFrames or other types without dtype attribute
-            import numpy as np
-
             fptype = np.float64
 
-        # _fit_method should be set by sklearnex level before calling oneDAL
-        if not hasattr(self, "_fit_method") or self._fit_method is None:
-            raise ValueError(
-                "_fit_method must be set by sklearnex level before calling oneDAL. "
-                "This indicates improper usage - oneDAL neighbors should not be called directly."
-            )
-
+        # _fit_method should be validated at sklearnex level before calling oneDAL
         return {
             "fptype": fptype,
             "vote_weights": "uniform" if weights == "uniform" else "distance",
@@ -109,77 +100,35 @@ def __init__(
         self.metric_params = metric_params
 
     def _fit(self, X, y):
+        # Basic initialization - all validation and preprocessing should be done at sklearnex level
         self._onedal_model = None
         self._tree = None
-        self._shape = None
-        self.classes_ = None
-        self.effective_metric_ = getattr(self, "effective_metric_", self.metric)
-        self.effective_metric_params_ = getattr(
-            self, "effective_metric_params_", self.metric_params
-        )
-
-        _, xp, _ = _get_sycl_namespace(X)
-        if y is not None or self.requires_y:
-            shape = getattr(y, "shape", None)
-            self._shape = shape if shape is not None else y.shape
-
-            if _is_classifier(self):
-                if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:
-                    self.outputs_2d_ = False
-                    y = y.reshape((-1, 1))
-                else:
-                    self.outputs_2d_ = True
-
-                self.classes_ = []
-                self._y = np.empty(y.shape, dtype=int)
-                for k in range(self._y.shape[1]):
-                    classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True)
-                    self.classes_.append(classes)
-
-                if not self.outputs_2d_:
-                    self.classes_ = self.classes_[0]
-                    self._y = self._y.ravel()
-            else:
-                self._y = y
 
+        # Set basic fitted attributes
         self.n_samples_fit_ = X.shape[0]
         self.n_features_in_ = X.shape[1]
         self._fit_X = X
 
+        # Prepare y for oneDAL (classification/regression handling done at sklearnex level)
         _fit_y = None
         queue = QM.get_global_queue()
         gpu_device = queue is not None and queue.sycl_device.is_gpu
 
         if _is_classifier(self) or (_is_regressor(self) and gpu_device):
             _fit_y = y.astype(X.dtype).reshape((-1, 1)) if y is not None else None
+        
         result = self._onedal_fit(X, _fit_y)
-
-        if y is not None and _is_regressor(self):
-            self._y = y if self._shape is None else xp.reshape(y, self._shape)
-
         self._onedal_model = result
-        result = self
-
-        return result
+        
+        return self
 
     def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         _check_is_fitted(self)
 
+        # All validation and preprocessing should be done at sklearnex level
         if n_neighbors is None:
             n_neighbors = self.n_neighbors
 
-        if X is not None:
-            query_is_train = False
-        else:
-            query_is_train = True
-            X = self._fit_X
-            # Include an extra neighbor to account for the sample itself being
-            # returned, which is removed later
-            n_neighbors += 1
-
-        n_samples_fit = self.n_samples_fit_
-
-        chunked_results = None
         # Use the fit method determined at sklearnex level
         method = getattr(self, "_fit_method", "brute")
 
@@ -188,52 +137,18 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         distances = from_table(prediction_results.distances)
         indices = from_table(prediction_results.indices)
 
+        # Sort results for kd_tree method
         if method == "kd_tree":
             for i in range(distances.shape[0]):
                 seq = distances[i].argsort()
                 indices[i] = indices[i][seq]
                 distances[i] = distances[i][seq]
 
+        # Return raw results - all post-processing done at sklearnex level
         if return_distance:
-            results = distances, indices
-        else:
-            results = indices
-
-        if chunked_results is not None:
-            if return_distance:
-                neigh_dist, neigh_ind = zip(*chunked_results)
-                results = np.vstack(neigh_dist), np.vstack(neigh_ind)
-            else:
-                results = np.vstack(chunked_results)
-
-        if not query_is_train:
-            return results
-
-        # If the query data is the same as the indexed data, we would like
-        # to ignore the first nearest neighbor of every sample, i.e
-        # the sample itself.
-        if return_distance:
-            neigh_dist, neigh_ind = results
+            return distances, indices
         else:
-            neigh_ind = results
-
-        n_queries, _ = X.shape
-        sample_range = np.arange(n_queries)[:, None]
-        sample_mask = neigh_ind != sample_range
-
-        # Corner case: When the number of duplicates are more
-        # than the number of neighbors, the first NN will not
-        # be the sample, but a duplicate.
-        # In that case mask the first duplicate.
-        dup_gr_nbrs = np.all(sample_mask, axis=1)
-        sample_mask[:, 0][dup_gr_nbrs] = False
-
-        neigh_ind = np.reshape(neigh_ind[sample_mask], (n_queries, n_neighbors - 1))
-
-        if return_distance:
-            neigh_dist = np.reshape(neigh_dist[sample_mask], (n_queries, n_neighbors - 1))
-            return neigh_dist, neigh_ind
-        return neigh_ind
+            return indices
 
 
 class KNeighborsClassifier(NeighborsBase, ClassifierMixin):
@@ -303,40 +218,11 @@ def predict(self, X, queue=None):
 
     @supports_queue
     def predict_proba(self, X, queue=None):
-        neigh_dist, neigh_ind = self.kneighbors(X, queue=queue)
-
-        classes_ = self.classes_
-        _y = self._y
-        if not self.outputs_2d_:
-            _y = self._y.reshape((-1, 1))
-            classes_ = [self.classes_]
-
-        n_queries = _num_samples(X)
-
-        # Use uniform weights for now - weights calculation should be done at sklearnex level
-        weights = np.ones_like(neigh_ind)
-
-        all_rows = np.arange(n_queries)
-        probabilities = []
-        for k, classes_k in enumerate(classes_):
-            pred_labels = _y[:, k][neigh_ind]
-            proba_k = np.zeros((n_queries, classes_k.size))
-
-            # a simple ':' index doesn't work right
-            for i, idx in enumerate(pred_labels.T):  # loop is O(n_neighbors)
-                proba_k[all_rows, idx] += weights[:, i]
-
-            # normalize 'votes' into real [0,1] probabilities
-            normalizer = proba_k.sum(axis=1)[:, np.newaxis]
-            normalizer[normalizer == 0.0] = 1.0
-            proba_k /= normalizer
-
-            probabilities.append(proba_k)
-
-        if not self.outputs_2d_:
-            probabilities = probabilities[0]
-
-        return probabilities
+        # This method should not be called directly - weights processing moved to sklearnex level
+        raise NotImplementedError(
+            "predict_proba weights processing moved to sklearnex level. "
+            "Use sklearnex.neighbors.KNeighborsClassifier instead."
+        )
 
     @supports_queue
     def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None):
@@ -427,38 +313,25 @@ def _predict_gpu(self, X):
         return result
 
     def _predict_skl(self, X):
-        neigh_dist, neigh_ind = self.kneighbors(X)
-
-        # Use uniform weights for now - weights calculation should be done at sklearnex level
-        weights = None
-
-        _y = self._y
-        if _y.ndim == 1:
-            _y = _y.reshape((-1, 1))
-
-        if weights is None:
-            y_pred = np.mean(_y[neigh_ind], axis=1)
-        else:
-            y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float64)
-            denom = np.sum(weights, axis=1)
-
-            for j in range(_y.shape[1]):
-                num = np.sum(_y[neigh_ind, j] * weights, axis=1)
-                y_pred[:, j] = num / denom
-
-        if self._y.ndim == 1:
-            y_pred = y_pred.ravel()
-
-        return y_pred
+        # This method should not be called directly - weights processing moved to sklearnex level
+        raise NotImplementedError(
+            "Regression weights processing moved to sklearnex level. "
+            "Use sklearnex.neighbors.KNeighborsRegressor instead."
+        )
 
     @supports_queue
     def predict(self, X, queue=None):
+        # For GPU with uniform weights, use direct oneDAL prediction
         gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False)
         is_uniform_weights = getattr(self, "weights", "uniform") == "uniform"
         if gpu_device and is_uniform_weights:
             return self._predict_gpu(X)
         else:
-            return self._predict_skl(X)
+            # Weights processing should be handled at sklearnex level
+            raise NotImplementedError(
+                "Regression weights processing moved to sklearnex level. "
+                "Use sklearnex.neighbors.KNeighborsRegressor instead."
+            )
 
 
 class NearestNeighbors(NeighborsBase):
 
@@ -140,6 +140,116 @@ def _validate_n_neighbors(self, n_neighbors):
                     "enter integer value" % type(n_neighbors)
                 )
 
+    def _validate_weights(self, weights):
+        """Validate weights parameter at sklearnex level."""
+        if weights not in [None, "uniform", "distance"] and not callable(weights):
+            raise ValueError(
+                "weights not recognized: should be 'uniform', "
+                "'distance', or a callable function"
+            )
+
+    def _validate_fit_method(self, fit_method):
+        """Validate that fit_method is properly set before calling oneDAL."""
+        if not hasattr(self, "_fit_method") or self._fit_method is None:
+            raise ValueError(
+                "_fit_method must be set by sklearnex level before calling oneDAL. "
+                "This indicates improper usage - oneDAL neighbors should not be called directly."
+            )
+
+    def _validate_kneighbors_params(self, n_neighbors, X=None):
+        """Validate parameters for kneighbors method."""
+        if n_neighbors is not None:
+            self._validate_n_neighbors(n_neighbors)
+            
+        # Check bounds if we have fit data
+        if hasattr(self, 'n_samples_fit_'):
+            effective_n_neighbors = n_neighbors if n_neighbors is not None else self.n_neighbors
+            if effective_n_neighbors > self.n_samples_fit_:
+                raise ValueError(
+                    "Expected n_neighbors <= n_samples_fit, but n_samples_fit = %d, "
+                    "n_neighbors = %d" % (self.n_samples_fit_, effective_n_neighbors)
+                )
+
+    def _process_kneighbors_results(self, results, X, n_neighbors, return_distance, query_is_train=None):
+        """Process kneighbors results at sklearnex level - handles chunking, self-neighbor removal, etc."""
+        
+        # Determine if query is training data
+        if query_is_train is None:
+            query_is_train = X is None
+        
+        # Handle chunked results (if any)
+        chunked_results = None  # This would be set by chunking logic if implemented
+        if chunked_results is not None:
+            if return_distance:
+                neigh_dist, neigh_ind = zip(*chunked_results)
+                results = np.vstack(neigh_dist), np.vstack(neigh_ind)
+            else:
+                results = np.vstack(chunked_results)
+
+        if not query_is_train:
+            return results
+
+        # If the query data is the same as the indexed data, we need to
+        # ignore the first nearest neighbor of every sample (the sample itself)
+        if return_distance:
+            neigh_dist, neigh_ind = results
+        else:
+            neigh_ind = results
+
+        n_queries = X.shape[0] if X is not None else self._fit_X.shape[0]
+        sample_range = np.arange(n_queries)[:, None]
+        sample_mask = neigh_ind != sample_range
+
+        # Corner case: When the number of duplicates are more
+        # than the number of neighbors, the first NN will not
+        # be the sample, but a duplicate.
+        # In that case mask the first duplicate.
+        dup_gr_nbrs = np.all(sample_mask, axis=1)
+        sample_mask[:, 0][dup_gr_nbrs] = False
+
+        neigh_ind = np.reshape(neigh_ind[sample_mask], (n_queries, n_neighbors - 1))
+
+        if return_distance:
+            neigh_dist = np.reshape(neigh_dist[sample_mask], (n_queries, n_neighbors - 1))
+            return neigh_dist, neigh_ind
+        return neigh_ind
+
+    def _compute_weights(self, distances, weights_param):
+        """Compute weights based on distances and weights parameter."""        
+        if weights_param in (None, "uniform"):
+            return None
+        elif weights_param == "distance":
+            # if user attempts to classify a point that was zero distance from one
+            # or more training points, those training points are weighted as 1.0
+            # and the other points as 0.0
+            if distances.dtype is np.dtype(object):
+                for i, dist_row in enumerate(distances):
+                    zero_mask = dist_row == 0.0
+                    if np.any(zero_mask):
+                        distances[i] = zero_mask.astype(np.float64)
+                    else:
+                        distances[i] = 1.0 / dist_row
+            else:
+                zero_mask = distances == 0.0
+                if np.any(zero_mask):
+                    # Handle the case where some distances are zero
+                    weights = np.where(zero_mask, 1.0, 1.0 / np.where(distances == 0.0, 1.0, distances))
+                    # Normalize so that zero distance points get all the weight
+                    for i in range(weights.shape[0]):
+                        if np.any(zero_mask[i]):
+                            weights[i] = zero_mask[i].astype(np.float64)
+                    return weights
+                else:
+                    return 1.0 / distances
+            return distances
+        elif callable(weights_param):
+            return weights_param(distances)
+        else:
+            raise ValueError(
+                "weights not recognized: should be 'uniform', "
+                "'distance', or a callable function"
+            )
+
     def _validate_feature_count(self, X, method_name=""):
         n_features = getattr(self, "n_features_in_", None)
         shape = getattr(X, "shape", None)
@@ -168,8 +278,6 @@ def _validate_kneighbors_bounds(self, n_neighbors, query_is_train, X):
 
     def _process_classification_targets(self, y):
         """Process classification targets and set class-related attributes."""
-        import numpy as np
-
         # Handle shape processing
         shape = getattr(y, "shape", None)
         self._shape = shape if shape is not None else y.shape