diff --git a/.ci/scripts/run_sklearn_tests.sh b/.ci/scripts/run_sklearn_tests.sh index 2c9234627b..d1a9650c4f 100755 --- a/.ci/scripts/run_sklearn_tests.sh +++ b/.ci/scripts/run_sklearn_tests.sh @@ -29,8 +29,11 @@ export DESELECT_FLAGS="--public ${DESELECT_FLAGS}" if [ -n "${SKLEARNEX_PREVIEW}" ]; then export DESELECT_FLAGS="--preview ${DESELECT_FLAGS}" fi -export DESELECTED_TESTS=$(python ../.circleci/deselect_tests.py ../deselected_tests.yaml ${DESELECT_FLAGS}) +if [ "$1" == "gpu" ]; then + export DESELECT_FLAGS="--gpu ${DESELECT_FLAGS}" +fi +export DESELECTED_TESTS=$(python ../.circleci/deselect_tests.py ../deselected_tests.yaml ${DESELECT_FLAGS}) # manual setting of OCL_ICD_FILENAMES is required in # specific MSYS environment with conda packages downloaded from intel channel if [[ "$(uname)" =~ "MSYS" ]] && [ -z "${OCL_ICD_FILENAMES}" ] && [ -n "${CONDA_PREFIX}" ]; then diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c3a231191c..3120e232dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,9 +33,35 @@ env: DPCTL_VERSION: 0.18.1 DPNP_VERSION: 0.16.0 DPCTL_PY_VERSIONS: '3.9\|3.11' + UXL_PYTHONVERSION: "3.12" + UXL_SKLEARNVERSION: "1.4" + ONEDAL_REPO: "uxlfoundation/oneDAL" jobs: + + onedal_nightly: + runs-on: ubuntu-24.04 + name: Identify oneDAL nightly + timeout-minutes: 2 + + steps: + - name: Get run ID of "Nightly-build" workflow + id: get-run-id + run: | + WF_NAME="Nightly-build" + JQ_QUERY='map(select(.event == "workflow_dispatch" or .event == "schedule")) | .[0].databaseId' + RUN_ID=`gh run --repo ${{ env.ONEDAL_REPO }} list --workflow "${WF_NAME}" --json databaseId,event --status success --jq "${JQ_QUERY}"` + echo "Detected latest run id of ${RUN_ID} for workflow ${WF_NAME}" + echo "run-id=${RUN_ID}" >> "$GITHUB_OUTPUT" + env: + GH_TOKEN: ${{ github.token }} + outputs: + run-id: ${{ steps.get-run-id.outputs.run-id }} + uxl-python: ${{ env.UXL_PYTHONVERSION }} + uxl-sklearn: ${{ env.UXL_SKLEARNVERSION }} + sklearn_lnx: + needs: onedal_nightly strategy: fail-fast: false matrix: @@ -46,7 +72,7 @@ jobs: SKLEARN_VERSION: "1.2" - PYTHON_VERSION: "3.11" SKLEARN_VERSION: "1.3" - name: LinuxNightly/pip Python${{ matrix.PYTHON_VERSION }}_Sklearn${{ matrix.SKLEARN_VERSION }} + name: LinuxNightly/venv Python${{ matrix.PYTHON_VERSION }}_Sklearn${{ matrix.SKLEARN_VERSION }} runs-on: ubuntu-24.04 timeout-minutes: 120 @@ -57,32 +83,21 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.PYTHON_VERSION }} - - name: Get run ID of "Nightly-build" workflow - id: get-run-id - run: | - OTHER_REPO="uxlfoundation/oneDAL" - WF_NAME="Nightly-build" - JQ_QUERY='map(select(.event == "workflow_dispatch" or .event == "schedule")) | .[0].databaseId' - RUN_ID=`gh run --repo ${OTHER_REPO} list --workflow "${WF_NAME}" --json databaseId,event --status success --jq "${JQ_QUERY}"` - echo "Detected latest run id of ${RUN_ID} for workflow ${WF_NAME}" - echo "run-id=${RUN_ID}" >> "$GITHUB_OUTPUT" - env: - GH_TOKEN: ${{ github.token }} - name: Download oneDAL build artifact uses: actions/download-artifact@v4 with: name: __release_lnx github-token: ${{ github.token }} - repository: uxlfoundation/oneDAL - run-id: ${{ steps.get-run-id.outputs.run-id }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} path: ./__release_lnx - name: Download oneDAL environment artifact uses: actions/download-artifact@v4 with: name: oneDAL_env github-token: ${{ github.token }} - repository: uxlfoundation/oneDAL - run-id: ${{ steps.get-run-id.outputs.run-id }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} path: .ci/env - name: Set Environment Variables id: set-env @@ -161,6 +176,7 @@ jobs: bash .ci/scripts/run_sklearn_tests.sh $CPU sklearn_win: + needs: onedal_nightly strategy: fail-fast: false matrix: @@ -171,7 +187,7 @@ jobs: SKLEARN_VERSION: "1.2" - PYTHON_VERSION: "3.11" SKLEARN_VERSION: "1.3" - name: WindowsNightly/pip Python${{ matrix.PYTHON_VERSION }}_Sklearn${{ matrix.SKLEARN_VERSION }} + name: WindowsNightly/venv Python${{ matrix.PYTHON_VERSION }}_Sklearn${{ matrix.SKLEARN_VERSION }} runs-on: windows-2025 timeout-minutes: 120 @@ -182,33 +198,21 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.PYTHON_VERSION }} - - name: Get run ID of "Nightly-build" workflow - id: get-run-id - shell: bash - run: | - OTHER_REPO="uxlfoundation/oneDAL" - WF_NAME="Nightly-build" - JQ_QUERY='map(select(.event == "workflow_dispatch" or .event == "schedule")) | .[0].databaseId' - RUN_ID=`gh run --repo ${OTHER_REPO} list --workflow "${WF_NAME}" --json databaseId,event --status success --jq "${JQ_QUERY}"` - echo "Detected latest run id of ${RUN_ID} for workflow ${WF_NAME}" - echo "run-id=${RUN_ID}" >> "$GITHUB_OUTPUT" - env: - GH_TOKEN: ${{ github.token }} - name: Download oneDAL build artifact uses: actions/download-artifact@v4 with: name: __release_win github-token: ${{ github.token }} - repository: uxlfoundation/oneDAL - run-id: ${{ steps.get-run-id.outputs.run-id }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} path: ./__release_win - name: Download Intel BaseKit artifact uses: actions/download-artifact@v4 with: name: intel_oneapi_basekit github-token: ${{ github.token }} - repository: uxlfoundation/oneDAL - run-id: ${{ steps.get-run-id.outputs.run-id }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} - name: Decompress Intel BaseKit shell: cmd run: | @@ -234,8 +238,8 @@ jobs: with: name: opencl_rt_installer github-token: ${{ github.token }} - repository: uxlfoundation/oneDAL - run-id: ${{ steps.get-run-id.outputs.run-id }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} path: . - name: Install Intel OpenCL CPU Runtime if: ${{ steps.set-env.outputs.DPCFLAG == '' }} @@ -313,3 +317,182 @@ jobs: if "${{ steps.set-env.outputs.DPCFLAG }}"=="" set CPU=cpu set SKLEARNEX_PREVIEW=YES bash .ci/scripts/run_sklearn_tests.sh %CPU% + + build_uxl: + if: github.repository == 'uxlfoundation/scikit-learn-intelex' + needs: onedal_nightly + name: LinuxNightly build Python${{ needs.onedal_nightly.outputs.uxl-python }} + runs-on: uxl-xlarge + timeout-minutes: 30 + + steps: + - name: Checkout Scikit-learn-intelex + uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.UXL_PYTHONVERSION }} + cache: 'pip' + cache-dependency-path: | + **/dependencies-dev + **/requirements-test.txt + - name: Download oneDAL build artifact + uses: actions/download-artifact@v4 + with: + name: __release_lnx + github-token: ${{ github.token }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} + path: ./__release_lnx + - name: Download oneDAL environment artifact + uses: actions/download-artifact@v4 + with: + name: oneDAL_env + github-token: ${{ github.token }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} + path: .ci/env + - name: Set Environment Variables + id: set-env + run: | + # Disable SPMD testing + echo "NO_DIST=1" >> "$GITHUB_ENV" + # enable coverage report generation + echo "SKLEARNEX_GCOV=1" >> "$GITHUB_ENV" + - name: apt-get + run: sudo apt-get update && sudo apt-get install -y clang-format + - name: dpcpp installation + run: | + # This CI system yields oneAPI dependencies from the oneDAL repository + bash .ci/env/apt.sh dpcpp + - name: describe system + run: | + source /opt/intel/oneapi/setvars.sh + bash .ci/scripts/describe_system.sh + - name: Install develop requirements + id: install-reqs + run: | + pip install -r dependencies-dev + echo "numpy-version=$(python -m pip freeze | grep numpy)" >> "$GITHUB_OUTPUT" + pip list + - name: Build daal4py/sklearnex + run: | + source .github/scripts/activate_components.sh ${{ steps.set-env.outputs.DPCFLAG }} + python setup.py bdist_wheel + - name: Archive sklearnex build + uses: actions/upload-artifact@v4 + with: + name: sklearnex_build_${{ env.UXL_PYTHONVERSION }} + path: | + ./dist/*.whl + + outputs: + numpy-version: ${{ steps.install-reqs.outputs.numpy-version }} + + test_uxl: + strategy: + fail-fast: false + matrix: + include: + - OS: uxl-gpu-xlarge + FRAMEWORKS: "pytorch,numpy" + DEVICE: gpu + - OS: uxl-xlarge + FRAMEWORKS: "pytorch,numpy" + DEVICE: cpu + needs: [onedal_nightly, build_uxl] + name: LinuxNightly [${{ matrix.FRAMEWORKS }}]-${{ matrix.DEVICE }} test Python${{ needs.onedal_nightly.outputs.uxl-python }}_Sklearn${{ needs.onedal_nightly.outputs.uxl-sklearn }} + runs-on: ${{ matrix.OS }} + timeout-minutes: 120 + steps: + - name: Checkout Scikit-learn-intelex + uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.UXL_PYTHONVERSION }} + cache-dependency-path: | + **/dependencies-dev + **/requirements-test.txt + - name: Download oneDAL build artifact + uses: actions/download-artifact@v4 + with: + name: __release_lnx + github-token: ${{ github.token }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} + path: ./__release_lnx + - name: Download oneDAL environment artifact + uses: actions/download-artifact@v4 + with: + name: oneDAL_env + github-token: ${{ github.token }} + repository: ${{ env.ONEDAL_REPO }} + run-id: ${{ needs.onedal_nightly.outputs.run-id }} + path: .ci/env + - name: Set Environment Variables + id: set-env + run: | + echo "NO_DIST=1" >> "$GITHUB_ENV" + # enable coverage report generation + echo "COVERAGE_RCFILE=$(readlink -f .coveragerc)" >> "$GITHUB_ENV" + echo "ONEDAL_PYTEST_FRAMEWORKS=${{ matrix.FRAMEWORKS }}" >> "$GITHUB_ENV" + # reduce GPU driver/runner related memory issues + echo "NEOReadDebugKeys=1" >> "$GITHUB_ENV" + echo "EnableRecoverablePageFaults=1" >> "$GITHUB_ENV" + echo "GpuFaultCheckThreshold=0" >> "$GITHUB_ENV" + # set build numpy version for use in generating code coverage + echo "NUMPY_BUILD=${{ needs.build_uxl.outputs.numpy-version }}" >> "$GITHUB_ENV" + - name: apt-get + run: sudo apt-get update + - name: dpcpp installation + run: | + # This CI system yields oneAPI dependencies from the oneDAL repository + bash .ci/env/apt.sh dpcpp + - name: describe system + run: | + source /opt/intel/oneapi/setvars.sh + bash .ci/scripts/describe_system.sh + - name: Install test requirements + run: | + bash .ci/scripts/setup_sklearn.sh ${{ env.UXL_SKLEARNVERSION }} + pip install --upgrade -r requirements-test.txt + pip install $(python .ci/scripts/get_compatible_scipy_version.py ${{ env.UXL_SKLEARVERSION }}) pyyaml + pip list + - name: Download sklearnex wheel + uses: actions/download-artifact@v4 + with: + name: sklearnex_build_${{ env.UXL_PYTHONVERSION }} + - name: Install PyTorch + if: contains(matrix.FRAMEWORKS, 'pytorch') + run: | + pip install torch --index-url https://download.pytorch.org/whl/xpu + python -c "import torch; _=[print(torch.xpu.get_device_name(i)) for i in range(torch.xpu.device_count())]" + - name: Install daal4py/sklearnex + run: pip install *.whl + - name: Sklearnex testing + run: | + source .github/scripts/activate_components.sh + export COVERAGE_FILE=$(pwd)/.coverage.sklearnex + cd .ci + ../conda-recipe/run_test.sh + - name: Sklearn testing + run: | + source .github/scripts/activate_components.sh + export COVERAGE_FILE=$(pwd)/.coverage.sklearn + bash .ci/scripts/run_sklearn_tests.sh ${{ matrix.DEVICE }} + - name: Create coverage report + run: | + source .github/scripts/activate_components.sh + bash .github/scripts/generate_coverage_reports.sh uxl_lnx_${{ matrix.DEVICE }} + - name: Archive coverage report + uses: actions/upload-artifact@v4 + with: + name: coverage_uxl_lnx_${{ matrix.DEVICE }} + path: | + *uxl_lnx_${{ matrix.DEVICE }}.info + - name: Sklearn testing [preview] + run: | + source .github/scripts/activate_components.sh + export SKLEARNEX_PREVIEW='YES' + bash .ci/scripts/run_sklearn_tests.sh ${{ matrix.DEVICE }} diff --git a/daal4py/sklearn/utils/validation.py b/daal4py/sklearn/utils/validation.py index 164b046632..b031b059c6 100644 --- a/daal4py/sklearn/utils/validation.py +++ b/daal4py/sklearn/utils/validation.py @@ -72,21 +72,20 @@ def _assert_all_finite( # Data with small size has too big relative overhead # TODO: tune threshold size - if hasattr(X, "size"): - if X.size < 32768: - if sklearn_check_version("1.1"): - _sklearn_assert_all_finite( - X, - allow_nan=allow_nan, - msg_dtype=msg_dtype, - estimator_name=estimator_name, - input_name=input_name, - ) - else: - _sklearn_assert_all_finite(X, allow_nan=allow_nan, msg_dtype=msg_dtype) - return - is_df = is_DataFrame(X) + if not (is_df or isinstance(X, np.ndarray)) or X.size < 32768: + if sklearn_check_version("1.1"): + _sklearn_assert_all_finite( + X, + allow_nan=allow_nan, + msg_dtype=msg_dtype, + estimator_name=estimator_name, + input_name=input_name, + ) + else: + _sklearn_assert_all_finite(X, allow_nan=allow_nan, msg_dtype=msg_dtype) + return + num_of_types = get_number_of_types(X) # if X is heterogeneous pandas.DataFrame then diff --git a/deselected_tests.yaml b/deselected_tests.yaml index a47986b1aa..7b4e9ddb62 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -40,22 +40,43 @@ deselected_tests: - neighbors/tests/test_neighbors.py::test_neighbor_classifiers_loocv[auto-nn_model0] # Array API support - # sklearnex functional Array API support doesn't guaranty namespace consistency for the estimator's array attributes. + # sklearnex functional Array API support doesn't guarantee namespace consistency for the estimator's array attributes. - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_input_and_values-array_api_strict-None-None] - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_input_and_values-array_api_strict-None-None] - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_get_precision-array_api_strict-None-None] - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_get_precision-array_api_strict-None-None] - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_attributes-array_api_strict-None-None] - linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_input_and_values-array_api_strict-None-None] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='full')-check_array_api_input_and_values-torch-cpu-float64] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='full')-check_array_api_input_and_values-torch-cpu-float32] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='full')-check_array_api_get_precision-torch-cpu-float64] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='full')-check_array_api_get_precision-torch-cpu-float32] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=0.1,svd_solver='full',whiten=True)-check_array_api_input_and_values-torch-cpu-float64] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=0.1,svd_solver='full',whiten=True)-check_array_api_input_and_values-torch-cpu-float32] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=0.1,svd_solver='full',whiten=True)-check_array_api_get_precision-torch-cpu-float64] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=0.1,svd_solver='full',whiten=True)-check_array_api_get_precision-torch-cpu-float32] + - decomposition/tests/test_pca.py::test_pca_mle_array_api_compliance[PCA(n_components='mle',svd_solver='full')-check_array_api_get_precision-torch-cpu-float64] + - decomposition/tests/test_pca.py::test_pca_mle_array_api_compliance[PCA(n_components='mle',svd_solver='full')-check_array_api_get_precision-torch-cpu-float32] # `train_test_split` inconsistency for Array API inputs. - model_selection/tests/test_split.py::test_array_api_train_test_split[True-None-array_api_strict-None-None] - model_selection/tests/test_split.py::test_array_api_train_test_split[True-stratify1-array_api_strict-None-None] - model_selection/tests/test_split.py::test_array_api_train_test_split[False-None-array_api_strict-None-None] + - model_selection/tests/test_split.py::test_array_api_train_test_split[True-None-torch-cpu-float64] + - model_selection/tests/test_split.py::test_array_api_train_test_split[True-None-torch-cpu-float32] + - model_selection/tests/test_split.py::test_array_api_train_test_split[True-stratify1-torch-cpu-float64] + - model_selection/tests/test_split.py::test_array_api_train_test_split[True-stratify1-torch-cpu-float32] + - model_selection/tests/test_split.py::test_array_api_train_test_split[False-None-torch-cpu-float64] + - model_selection/tests/test_split.py::test_array_api_train_test_split[False-None-torch-cpu-float32] + # PCA. Array API functionally supported for all factorizations. power_iteration_normalizer=["LU", "QR"] - decomposition/tests/test_pca.py::test_array_api_error_and_warnings_on_unsupported_params # PCA. InvalidParameterError: The 'M' parameter of randomized_svd must be an instance of 'numpy.ndarray' or a sparse matrix. - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_input_and_values-array_api_strict-None-None] - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_get_precision-array_api_strict-None-None] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_input_and_values-torch-cpu-float64] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_input_and_values-torch-cpu-float32] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_get_precision-torch-cpu-float64] + - decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_get_precision-torch-cpu-float32] # Ridge regression. Array API functionally supported for all solvers. Not raising error for non-svd solvers. - linear_model/tests/test_ridge.py::test_array_api_error_and_warnings_for_solver_parameter[array_api_strict] @@ -448,3 +469,6 @@ gpu: # Introduced with RNG forest updates in oneDAL - ensemble/tests/test_voting.py::test_set_estimator_drop + + # Deselection for Scikit-Learn 1.4 GPU conformance + - model_selection/tests/test_validation.py::test_learning_curve_some_failing_fits_warning >=1.4 diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 66a35b2365..18b165bdf9 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -22,6 +22,8 @@ import numpy as np from sklearn import get_config +from onedal import _default_backend as backend + from ._config import _get_config from .utils import _sycl_queue_manager as QM from .utils._array_api import _asarray, _is_numpy_namespace @@ -35,7 +37,9 @@ SyclQueue = getattr(_dpc_backend, "SyclQueue", None) + logger = logging.getLogger("sklearnex") +cpu_dlpack_device = (backend.kDLCPU, 0) def supports_queue(func): @@ -106,7 +110,6 @@ def _transfer_to_host(*data): host_data = [] for item in data: usm_iface = getattr(item, "__sycl_usm_array_interface__", None) - array_api = getattr(item, "__array_namespace__", lambda: None)() if usm_iface is not None: if not dpctl_available: raise RuntimeError( @@ -126,10 +129,29 @@ def _transfer_to_host(*data): order=order, ) has_usm_data = True - elif array_api and not _is_numpy_namespace(array_api): - # `copy`` param for the `asarray`` is not setted. - # The object is copied only if needed. - item = np.asarray(item) + elif not isinstance(item, np.ndarray) and ( + device := getattr(item, "__dlpack_device__", None) + ): + # check dlpack data location. + if device() != cpu_dlpack_device: + if hasattr(item, "to_device"): + # use of the "cpu" string as device not officially part of + # the array api standard but widely supported + item = item.to_device("cpu") + elif hasattr(item, "to"): + # pytorch-specific fix as it is not array api compliant + item = item.to("cpu") + else: + raise TypeError(f"cannot move {type(item)} to cpu") + + # convert to numpy + if hasattr(item, "__array__"): + # `copy`` param for the `asarray`` is not set. + # The object is copied only if needed + item = np.asarray(item) + else: + # requires numpy 1.23 + item = np.from_dlpack(item) has_host_data = True else: has_host_data = True diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp index 12f51dca81..d76171be7b 100644 --- a/onedal/datatypes/table.cpp +++ b/onedal/datatypes/table.cpp @@ -110,7 +110,8 @@ ONEDAL_PY_INIT_MODULE(table) { m.def("dlpack_memory_order", &dlpack::dlpack_memory_order); py::enum_(m, "DLDeviceType") .value("kDLCPU", kDLCPU) - .value("kDLOneAPI", kDLOneAPI); + .value("kDLOneAPI", kDLOneAPI) + .export_values(); } } // namespace oneapi::dal::python diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py index f63f2d687c..95cb0b0e73 100755 --- a/sklearnex/utils/validation.py +++ b/sklearnex/utils/validation.py @@ -14,6 +14,7 @@ # limitations under the License. # =============================================================================== +import math import numbers import scipy.sparse as sp @@ -70,7 +71,10 @@ def _sklearnex_assert_all_finite( # size check is an initial match to daal4py for performance reasons, can be # optimized later xp, _ = get_namespace(X) - if X.size < 32768 or not _onedal_supported_format(X, xp): + # this is a PyTorch-specific fix, as Tensor.size is a function. It replicates `.size` + too_small = math.prod(X.shape) < 32768 + + if too_small or not _onedal_supported_format(X, xp): if sklearn_check_version("1.1"): _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name) else: