uxlfoundation
diff --git a/‎deselected_tests.yaml‎
Lines changed: 8 additions & 0 deletions b/‎deselected_tests.yaml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎onedal/_device_offload.py‎
Lines changed: 2 additions & 2 deletions b/‎onedal/_device_offload.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎onedal/utils/_array_api.py‎
Lines changed: 11 additions & 1 deletion b/‎onedal/utils/_array_api.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎onedal/utils/_sycl_queue_manager.py‎
Lines changed: 16 additions & 2 deletions b/‎onedal/utils/_sycl_queue_manager.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎sklearnex/_device_offload.py‎
Lines changed: 140 additions & 89 deletions b/‎sklearnex/_device_offload.py‎
Lines changed: 140 additions & 89 deletions
@@ -309,6 +309,14 @@ deselected_tests:
   # Failure occurs in python3.9 on windows CPU only - not easy to reproduce
   - ensemble/tests/test_weight_boosting.py::test_estimator >= 1.4 win32
 
+  # array_api dispatching for oneDAL is recognized using the sklearn tag system. This is an expected
+  # break in sklearn conformance as additional non-default tags cause errors in this test. This
+  # non-conformance does not impact the verification system of sklearn. A new system (sklearn >=1.6)
+  # allows for public tag addition. Sklearn also acknowledges the abuse of the old system by others,
+  # meaning its not too impactful to use it so long as the default keys and value types set by sklearn
+  # are respected.
+  - tests/test_common.py::test_valid_tag_types <1.6
+
   # --------------------------------------------------------
   # No need to test daal4py patching
 reduced_tests:
 
@@ -198,9 +198,9 @@ def wrapper_impl(*args, **kwargs):
                     result = _convert_to_dpnp(result)
                 return result
 
-        if not get_config().get("transform_output"):
+        if get_config().get("transform_output") in ("default", None):
             input_array_api = getattr(data[0], "__array_namespace__", lambda: None)()
-            if input_array_api:
+            if input_array_api and not _is_numpy_namespace(input_array_api):
                 input_array_api_device = data[0].device
                 result = _asarray(result, input_array_api, device=input_array_api_device)
         return result
 
@@ -40,9 +40,19 @@ def _convert_to_dpnp(array):
         return array
 
 
+def _supports_buffer_protocol(obj):
+    # the array_api standard mandates conversion with the buffer protocol,
+    # which can only be checked via a try-catch in native python
+    try:
+        memoryview(obj)
+    except TypeError:
+        return False
+    return True
+
+
 def _asarray(data, xp, *args, **kwargs):
     """Converted input object to array format of xp namespace provided."""
-    if hasattr(data, "__array_namespace__"):
+    if hasattr(data, "__array_namespace__") or _supports_buffer_protocol(data):
         return xp.asarray(data, *args, **kwargs)
     elif isinstance(data, Iterable):
         if isinstance(data, tuple):
 
@@ -26,6 +26,11 @@
 
     SyclQueue = getattr(_dpc_backend, "SyclQueue", None)
 
+# This special object signifies that the queue system should be
+# disabled. It will force computation to host. This occurs when the
+# global queue is set to this value (and therefore should not be
+# modified).
+__fallback_queue = object()
 # single instance of global queue
 __global_queue = None
 
@@ -46,8 +51,11 @@ def __create_sycl_queue(target):
 def get_global_queue():
     """Get the global queue. Retrieve it from the config if not set."""
     if (queue := __global_queue) is not None:
-        if SyclQueue and not isinstance(queue, SyclQueue):
-            raise ValueError("Global queue is not a SyclQueue object.")
+        if SyclQueue:
+            if queue is __fallback_queue:
+                return None
+            elif not isinstance(queue, SyclQueue):
+                raise ValueError("Global queue is not a SyclQueue object.")
         return queue
 
     target = _get_config()["target_offload"]
@@ -73,6 +81,12 @@ def update_global_queue(queue):
     __global_queue = queue
 
 
+def fallback_to_host():
+    """Enforce a host queue."""
+    global __global_queue
+    __global_queue = __fallback_queue
+
+
 def from_data(*data):
     """Extract the queue from provided data. This updates the global queue as well."""
     for item in data:
 
@@ -14,126 +14,177 @@
 # limitations under the License.
 # ==============================================================================
 
+from collections.abc import Callable
 from functools import wraps
+from typing import Any, Union
 
 from onedal._device_offload import _copy_to_usm, _transfer_to_host
 from onedal.utils import _sycl_queue_manager as QM
-from onedal.utils._array_api import _asarray
+from onedal.utils._array_api import _asarray, _is_numpy_namespace
 from onedal.utils._dpep_helpers import dpnp_available
 
 if dpnp_available:
     import dpnp
     from onedal.utils._array_api import _convert_to_dpnp
 
-from ._config import get_config
-
-
-def _get_backend(obj, queue, method_name, *data):
-    with QM.manage_global_queue(queue, *data) as queue:
-        cpu_device = queue is None or getattr(queue.sycl_device, "is_cpu", True)
-        gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False)
-
-        if cpu_device:
-            patching_status = obj._onedal_cpu_supported(method_name, *data)
-            if patching_status.get_status():
-                return "onedal", patching_status
-            else:
-                return "sklearn", patching_status
+from ._config import config_context, get_config, set_config
+from ._utils import PatchingConditionsChain, get_tags
+from .base import oneDALEstimator
+
+
+def _get_backend(
+    obj: type[oneDALEstimator], method_name: str, *data
+) -> tuple[Union[bool, None], PatchingConditionsChain]:
+    """This function verifies the hardware conditions, data characteristics, and
+    estimator parameters necessary for offloading computation to oneDAL. The status
+    of this patching is returned as a PatchingConditionsChain object along with a
+    boolean flag signaling whether the computation can be offloaded to oneDAL or not.
+    It is assumed that the queue (which determined what hardware to possibly use for
+    oneDAL) has been previously and extensively collected (i.e. the data has already
+    been checked using onedal's SyclQueueManager for queues)."""
+    queue = QM.get_global_queue()
+    cpu_device = queue is None or getattr(queue.sycl_device, "is_cpu", True)
+    gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False)
+
+    if cpu_device:
+        patching_status = obj._onedal_cpu_supported(method_name, *data)
+        return patching_status.get_status(), patching_status
+
+    if gpu_device:
+        patching_status = obj._onedal_gpu_supported(method_name, *data)
+        if (
+            not patching_status.get_status()
+            and (config := get_config())["allow_fallback_to_host"]
+        ):
+            QM.fallback_to_host()
+            return None, patching_status
+        return patching_status.get_status(), patching_status
+
+    raise RuntimeError("Device support is not implemented for the supplied data type.")
+
+
+if "array_api_dispatch" in get_config():
+    _array_api_offload = lambda: get_config()["array_api_dispatch"]
+else:
+    _array_api_offload = lambda: False
+
+
+def dispatch(
+    obj: type[oneDALEstimator],
+    method_name: str,
+    branches: dict[Callable, Callable],
+    *args,
+    **kwargs,
+) -> Any:
+    """Dispatch object method call to oneDAL if conditionally possible.
+    Depending on support conditions, oneDAL will be called, otherwise it will
+    fall back to calling scikit-learn.  Dispatching to oneDAL can be influenced
+    by the 'use_raw_input' or 'allow_fallback_to_host' config parameters.
+
+    Parameters
+    ----------
+    obj : object
+        sklearnex object which inherits from oneDALEstimator and contains
+        ``onedal_cpu_supported`` and ``onedal_gpu_supported`` methods which
+        evaluate oneDAL support.
+
+    method_name : string
+        name of method to be evaluated for oneDAL support
+
+    branches : dict
+        dictionary containing functions to be called. Only keys 'sklearn' and
+        'onedal' are used which should contain the relevant scikit-learn and
+        onedal object methods respectively. All functions should accept the
+        inputs from *args and **kwargs. Additionally, the onedal object method
+        must accept a 'queue' keyword.
+
+    *args : tuple
+        arguments to be supplied to the dispatched method
+
+    **kwargs : dict
+        keyword arguments to be supplied to the dispatched method
+
+    Returns
+    -------
+    unknown : object
+        Returned object dependent on the supplied branches. Implicitly the returned
+        object types should match for the sklearn and onedal object methods.
+    """
 
-        allow_fallback_to_host = get_config()["allow_fallback_to_host"]
+    if get_config()["use_raw_input"]:
+        return branches["onedal"](obj, *args, **kwargs)
 
-        if gpu_device:
-            patching_status = obj._onedal_gpu_supported(method_name, *data)
-            if patching_status.get_status():
-                return "onedal", patching_status
-            else:
-                QM.remove_global_queue()
-                if allow_fallback_to_host:
-                    patching_status = obj._onedal_cpu_supported(method_name, *data)
-                    if patching_status.get_status():
-                        return "onedal", patching_status
-                    else:
-                        return "sklearn", patching_status
-                else:
-                    return "sklearn", patching_status
-
-    raise RuntimeError("Device support is not implemented")
-
-
-def get_array_api_support_tag(estimator):
-    """Gets the value of the 'array_api_support' tag from the estimator
-    using correct code path depending on the scikit-learn version."""
-    if hasattr(estimator, "__sklearn_tags__"):
-        return estimator.__sklearn_tags__().array_api_support
-    elif hasattr(estimator, "_get_tags"):
-        tags = estimator._get_tags()
-        if "array_api_support" in tags:
-            return tags["array_api_support"]
-    return False
-
-
-def dispatch(obj, method_name, branches, *args, **kwargs):
-    if get_config()["use_raw_input"] is False:
-        with QM.manage_global_queue(None, *args) as queue:
-            has_usm_data_for_args, hostargs = _transfer_to_host(*args)
-            has_usm_data_for_kwargs, hostvalues = _transfer_to_host(*kwargs.values())
-            hostkwargs = dict(zip(kwargs.keys(), hostvalues))
-
-            backend, patching_status = _get_backend(obj, queue, method_name, *hostargs)
-            has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs
-            if backend == "onedal":
-                # Host args only used before onedal backend call.
-                # Device will be offloaded when onedal backend will be called.
+    # Determine if array_api dispatching is enabled, and if estimator is capable
+    onedal_array_api = _array_api_offload() and get_tags(obj).onedal_array_api
+    sklearn_array_api = _array_api_offload() and get_tags(obj).array_api_support
+
+    # backend can only be a boolean or None, None signifies an unverified backend
+    backend: "bool | None" = None
+
+    # config context needs to be saved, as the sycl_queue_manager interacts with
+    # target_offload, which can regenerate a GPU queue later on. Therefore if a
+    # fallback occurs, then the state of target_offload must be set to default
+    # so that later use of get_global_queue only sends to host. We must modify
+    # the target offload settings, but we must also set the original value at the
+    # end, hence the need of a contextmanager.
+    with QM.manage_global_queue(None, *args):
+        if onedal_array_api:
+            backend, patching_status = _get_backend(obj, method_name, *args)
+            if backend:
+                queue = QM.get_global_queue()
                 patching_status.write_log(queue=queue, transferred_to_host=False)
-                return branches[backend](obj, *hostargs, **hostkwargs, queue=queue)
-            if backend == "sklearn":
-                if (
-                    "array_api_dispatch" in get_config()
-                    and get_config()["array_api_dispatch"]
-                    and get_array_api_support_tag(obj)
-                    and not has_usm_data
-                ):
-                    # USM ndarrays are also excluded for the fallback Array API. Currently, DPNP.ndarray is
-                    # not compliant with the Array API standard, and DPCTL usm_ndarray Array API is compliant,
-                    # except for the linalg module. There is no guarantee that stock scikit-learn will
-                    # work with such input data. The condition will be updated after DPNP.ndarray and
-                    # DPCTL usm_ndarray enabling for conformance testing and these arrays supportance
-                    # of the fallback cases.
-                    # If `array_api_dispatch` enabled and array api is supported for the stock scikit-learn,
-                    # then raw inputs are used for the fallback.
-                    patching_status.write_log(transferred_to_host=False)
-                    return branches[backend](obj, *args, **kwargs)
-                else:
-                    patching_status.write_log()
-                    return branches[backend](obj, *hostargs, **hostkwargs)
-        raise RuntimeError(
-            f"Undefined backend {backend} in " f"{obj.__class__.__name__}.{method_name}"
-        )
-    else:
-        return branches["onedal"](obj, *args, **kwargs)
+                return branches["onedal"](obj, *args, **kwargs, queue=queue)
+            elif sklearn_array_api and backend is False:
+                patching_status.write_log(transferred_to_host=False)
+                return branches["sklearn"](obj, *args, **kwargs)
+
+        # move data to host because of multiple reasons: array_api fallback to host,
+        # non array_api supporing oneDAL code, issues with usm support in sklearn.
+        has_usm_data_for_args, hostargs = _transfer_to_host(*args)
+        has_usm_data_for_kwargs, hostvalues = _transfer_to_host(*kwargs.values())
+
+        hostkwargs = dict(zip(kwargs.keys(), hostvalues))
+        has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs
+
+        while backend is None:
+            backend, patching_status = _get_backend(obj, method_name, *hostargs)
+
+        if backend:
+            queue = QM.get_global_queue()
+            patching_status.write_log(queue=queue, transferred_to_host=False)
+            return branches["onedal"](obj, *hostargs, **hostkwargs, queue=queue)
+        else:
+            if sklearn_array_api and not has_usm_data:
+                # dpnp fallback is not handled properly yet.
+                patching_status.write_log(transferred_to_host=False)
+                return branches["sklearn"](obj, *args, **kwargs)
+            else:
+                patching_status.write_log()
+                return branches["sklearn"](obj, *hostargs, **hostkwargs)
 
 
-def wrap_output_data(func):
+def wrap_output_data(func: Callable) -> Callable:
     """
     Converts and moves the output arrays of the decorated function
     to match the input array type and device.
     """
 
     @wraps(func)
-    def wrapper(self, *args, **kwargs):
+    def wrapper(self, *args, **kwargs) -> Any:
         result = func(self, *args, **kwargs)
         if not (len(args) == 0 and len(kwargs) == 0):
             data = (*args, *kwargs.values())
+
             usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None)
             if usm_iface is not None:
                 result = _copy_to_usm(usm_iface["syclobj"], result)
                 if dpnp_available and isinstance(data[0], dpnp.ndarray):
                     result = _convert_to_dpnp(result)
                 return result
-            config = get_config()
-            if not ("transform_output" in config and config["transform_output"]):
+
+            if get_config().get("transform_output") in ("default", None):
                 input_array_api = getattr(data[0], "__array_namespace__", lambda: None)()
-                if input_array_api:
+                if input_array_api and not _is_numpy_namespace(input_array_api):
                     input_array_api_device = data[0].device
                     result = _asarray(
                         result, input_array_api, device=input_array_api_device