IntelPython · antonwolfy · Jan 11, 2025 · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025
@@ -25,7 +25,7 @@ jobs:
   build-and-deploy:
     name: Build and Deploy Docs
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
 
     permissions:
       # Needed to cancel any previous runs that are not completed for a given workflow

@@ -28,7 +28,7 @@ env:
 
 jobs:
   test_by_tag:
-    name: Run on ['${{ matrix.os }}', python='${{ matrix.python }}'] with oneMKL tag
+    name: Run tests with oneMKL tag
 
     strategy:
       matrix:
@@ -121,7 +121,7 @@ jobs:
           SYCL_CACHE_PERSISTENT: 1
 
   test_by_branch:
-    name: Run on ['${{ matrix.os }}', python='${{ matrix.python }}'] with oneMKL develop branch
+    name: Run tests with oneMKL develop branch
 
     strategy:
       matrix:

@@ -25,7 +25,7 @@ env:
 
 jobs:
   build:
-    name: Build ['${{ matrix.os }}', python='${{ matrix.python }}']
+    name: Build
 
     strategy:
       matrix:
@@ -56,6 +56,8 @@ jobs:
           fetch-depth: 0
 
       - name: Setup miniconda
+        id: setup_miniconda
+        continue-on-error: true
         uses: conda-incubator/setup-miniconda@d2e6a045a86077fb6cad6f5adf368e9076ddaa8d # v3.1.0
         with:
           miniforge-version: latest
@@ -65,9 +67,16 @@ jobs:
           python-version: ${{ env.CONDA_BUILD_INDEX_ENV_PY_VER}}
           activate-environment: 'build'
 
-      # Sometimes `mamba install ...` fails due to slow download speed rate, so disable the check in mamba
-      - name: Disable speed limit check in mamba
-        run: echo "MAMBA_NO_LOW_SPEED_LIMIT=1" >> $GITHUB_ENV
+      - name: ReSetup miniconda
+        if: steps.setup_miniconda.outcome == 'failure'
+        uses: conda-incubator/setup-miniconda@d2e6a045a86077fb6cad6f5adf368e9076ddaa8d # v3.1.0
+        with:
+          miniforge-version: latest
+          use-mamba: 'true'
+          channels: conda-forge
+          conda-remove-defaults: 'true'
+          python-version: ${{ env.CONDA_BUILD_INDEX_ENV_PY_VER}}
+          activate-environment: 'build'
 
       - name: Store conda paths as envs
         shell: bash -el {0}
@@ -76,6 +85,12 @@ jobs:
           echo "WHEELS_OUTPUT_FOLDER=$GITHUB_WORKSPACE${{ runner.os == 'Linux' && '/' || '\\' }}" >> $GITHUB_ENV
 
       - name: Install conda-build
+        id: install_conda_build
+        continue-on-error: true
+        run: mamba install conda-build=${{ env.CONDA_BUILD_VERSION}}
+
+      - name: ReInstall conda-build
+        if: steps.install_conda_build.outcome == 'failure'
         run: mamba install conda-build=${{ env.CONDA_BUILD_VERSION}}
 
       - name: Build conda package
@@ -96,11 +111,11 @@ jobs:
           path: ${{ env.WHEELS_OUTPUT_FOLDER }}${{ env.PACKAGE_NAME }}-*.whl
 
   test_linux:
-    name: Test ['ubuntu-latest', python='${{ matrix.python }}']
+    name: Test
 
     needs: build
 
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
 
     defaults:
       run:
@@ -109,6 +124,7 @@ jobs:
     strategy:
       matrix:
         python: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        os: [ubuntu-latest]
 
     continue-on-error: true
 
@@ -199,11 +215,11 @@ jobs:
             python -m pytest -ra --pyargs ${{ env.PACKAGE_NAME }}.tests
 
   test_windows:
-    name: Test ['windows-2019', python='${{ matrix.python }}']
+    name: Test
 
     needs: build
 
-    runs-on: windows-2019
+    runs-on: ${{ matrix.os }}
 
     defaults:
       run:
@@ -212,6 +228,7 @@ jobs:
     strategy:
       matrix:
         python: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        os: [windows-2019]
 
     continue-on-error: true
 
@@ -335,7 +352,7 @@ jobs:
             python -m pytest -ra --pyargs ${{ env.PACKAGE_NAME }}.tests
 
   upload:
-    name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}']
+    name: Upload
 
     needs: [test_linux, test_windows]
 

@@ -15,12 +15,14 @@ env:
   PACKAGE_NAME: dpnp
   CHANNELS: '-c dppy/label/dev -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels'
   TEST_ENV_NAME: test
-  RERUN_TESTS_ON_FAILURE: 'true'
   RUN_TESTS_MAX_ATTEMPTS: 2
 
 jobs:
   test:
-    name: Test ['${{ matrix.runner }}', python='${{ matrix.python }}']
+    name: Test
+
+    # disable scheduled workflow to be run in forks
+    if: github.event.repository.fork == false
 
     runs-on:  ${{ matrix.runner }}
 
@@ -33,12 +35,11 @@ jobs:
       actions: write
 
     strategy:
+      fail-fast: false
       matrix:
         python: ['3.9', '3.10', '3.11', '3.12', '3.13']
         runner: [ubuntu-22.04, ubuntu-24.04, windows-2019]
 
-    continue-on-error: false
-
     steps:
       - name: Cancel Previous Runs
         uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
@@ -57,6 +58,19 @@ jobs:
           echo "Latest tag is ${{ steps.find_latest_tag.outputs.tag }}"
 
       - name: Setup miniconda
+        id: setup_miniconda
+        continue-on-error: true
+        uses: conda-incubator/setup-miniconda@d2e6a045a86077fb6cad6f5adf368e9076ddaa8d # v3.1.0
+        with:
+          miniforge-version: latest
+          use-mamba: 'true'
+          channels: conda-forge
+          conda-remove-defaults: 'true'
+          python-version: ${{ matrix.python }}
+          activate-environment: ${{ env.TEST_ENV_NAME }}
+
+      - name: ReSetup miniconda
+        if: steps.setup_miniconda.outcome == 'failure'
         uses: conda-incubator/setup-miniconda@d2e6a045a86077fb6cad6f5adf368e9076ddaa8d # v3.1.0
         with:
           miniforge-version: latest
@@ -67,10 +81,15 @@ jobs:
           activate-environment: ${{ env.TEST_ENV_NAME }}
 
       - name: Install dpnp
+        id: install_dpnp
+        continue-on-error: true
+        run: |
+          mamba install ${{ env.PACKAGE_NAME }}=${{ steps.find_latest_tag.outputs.tag }} pytest ${{ env.CHANNELS }}
+
+      - name: ReInstall dpnp
+        if: steps.install_dpnp.outcome == 'failure'
         run: |
           mamba install ${{ env.PACKAGE_NAME }}=${{ steps.find_latest_tag.outputs.tag }} pytest ${{ env.CHANNELS }}
-        env:
-          MAMBA_NO_LOW_SPEED_LIMIT: 1
 
       - name: List installed packages
         run: mamba list
@@ -95,14 +114,15 @@ jobs:
           python -c "import dpnp; print(dpnp.__version__)"
 
       - name: Run tests
-        if: env.RERUN_TESTS_ON_FAILURE != 'true'
+        id: run_tests
+        continue-on-error: true
         run: |
           python -m pytest -ra --pyargs ${{ env.PACKAGE_NAME }}.tests
         env:
           SYCL_CACHE_PERSISTENT: 1
 
       - name: ReRun tests on Linux
-        if: env.RERUN_TESTS_ON_FAILURE == 'true' && matrix.runner != 'windows-2019'
+        if: steps.run_tests.outcome == 'failure' && matrix.runner != 'windows-2019'
         id: run_tests_linux
         uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0
         with:
@@ -119,7 +139,7 @@ jobs:
           SYCL_CACHE_PERSISTENT: 1
 
       - name: ReRun tests on Windows
-        if: env.RERUN_TESTS_ON_FAILURE == 'true' && matrix.runner == 'windows-2019'
+        if: steps.run_tests.outcome == 'failure' && matrix.runner == 'windows-2019'
         id: run_tests_win
         uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0
         with:

@@ -9,7 +9,7 @@ permissions: read-all
 
 jobs:
   pre-commit:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - name: Set up clang-format
         run: |

@@ -218,28 +218,21 @@ std::pair<sycl::event, sycl::event>
     std::vector<sycl::event> host_tasks{};
     host_tasks.reserve(2);
 
-    const auto &ptr_size_event_triple_ = device_allocate_and_pack<py::ssize_t>(
+    auto ptr_size_event_triple_ = device_allocate_and_pack<py::ssize_t>(
         q, host_tasks, simplified_shape, simplified_src_strides,
         simplified_dst_strides);
-    py::ssize_t *shape_strides = std::get<0>(ptr_size_event_triple_);
-    const sycl::event &copy_shape_ev = std::get<2>(ptr_size_event_triple_);
-
-    if (shape_strides == nullptr) {
-        throw std::runtime_error("Device memory allocation failed");
-    }
+    auto shape_strides_owner = std::move(std::get<0>(ptr_size_event_triple_));
+    const auto &copy_shape_ev = std::get<2>(ptr_size_event_triple_);
+    const py::ssize_t *shape_strides = shape_strides_owner.get();
 
     sycl::event strided_fn_ev =
         strided_fn(q, src_nelems, nd, shape_strides, src_data, src_offset,
                    dst_data, dst_offset, depends, {copy_shape_ev});
 
     // async free of shape_strides temporary
-    auto ctx = q.get_context();
-    sycl::event tmp_cleanup_ev = q.submit([&](sycl::handler &cgh) {
-        cgh.depends_on(strided_fn_ev);
-        using dpctl::tensor::alloc_utils::sycl_free_noexcept;
-        cgh.host_task(
-            [ctx, shape_strides]() { sycl_free_noexcept(shape_strides, ctx); });
-    });
+    sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+        q, {strided_fn_ev}, shape_strides_owner);
+
     host_tasks.push_back(tmp_cleanup_ev);
 
     return std::make_pair(
@@ -543,30 +536,21 @@ std::pair<sycl::event, sycl::event> py_binary_ufunc(
     }
 
     using dpctl::tensor::offset_utils::device_allocate_and_pack;
-    const auto &ptr_sz_event_triple_ = device_allocate_and_pack<py::ssize_t>(
+    auto ptr_sz_event_triple_ = device_allocate_and_pack<py::ssize_t>(
         exec_q, host_tasks, simplified_shape, simplified_src1_strides,
         simplified_src2_strides, simplified_dst_strides);
+    auto shape_strides_owner = std::move(std::get<0>(ptr_sz_event_triple_));
+    auto &copy_shape_ev = std::get<2>(ptr_sz_event_triple_);
 
-    py::ssize_t *shape_strides = std::get<0>(ptr_sz_event_triple_);
-    const sycl::event &copy_shape_ev = std::get<2>(ptr_sz_event_triple_);
-
-    if (shape_strides == nullptr) {
-        throw std::runtime_error("Unable to allocate device memory");
-    }
+    const py::ssize_t *shape_strides = shape_strides_owner.get();
 
     sycl::event strided_fn_ev = strided_fn(
         exec_q, src_nelems, nd, shape_strides, src1_data, src1_offset,
         src2_data, src2_offset, dst_data, dst_offset, depends, {copy_shape_ev});
 
     // async free of shape_strides temporary
-    auto ctx = exec_q.get_context();
-
-    sycl::event tmp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
-        cgh.depends_on(strided_fn_ev);
-        using dpctl::tensor::alloc_utils::sycl_free_noexcept;
-        cgh.host_task(
-            [ctx, shape_strides]() { sycl_free_noexcept(shape_strides, ctx); });
-    });
+    sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+        exec_q, {strided_fn_ev}, shape_strides_owner);
 
     host_tasks.push_back(tmp_cleanup_ev);
 
@@ -796,30 +780,21 @@ std::pair<sycl::event, sycl::event>
     }
 
     using dpctl::tensor::offset_utils::device_allocate_and_pack;
-    const auto &ptr_sz_event_triple_ = device_allocate_and_pack<py::ssize_t>(
+    auto ptr_sz_event_triple_ = device_allocate_and_pack<py::ssize_t>(
         exec_q, host_tasks, simplified_shape, simplified_rhs_strides,
         simplified_lhs_strides);
+    auto shape_strides_owner = std::move(std::get<0>(ptr_sz_event_triple_));
+    auto copy_shape_ev = std::get<2>(ptr_sz_event_triple_);
 
-    py::ssize_t *shape_strides = std::get<0>(ptr_sz_event_triple_);
-    const sycl::event &copy_shape_ev = std::get<2>(ptr_sz_event_triple_);
-
-    if (shape_strides == nullptr) {
-        throw std::runtime_error("Unable to allocate device memory");
-    }
+    const py::ssize_t *shape_strides = shape_strides_owner.get();
 
     sycl::event strided_fn_ev =
         strided_fn(exec_q, rhs_nelems, nd, shape_strides, rhs_data, rhs_offset,
                    lhs_data, lhs_offset, depends, {copy_shape_ev});
 
     // async free of shape_strides temporary
-    auto ctx = exec_q.get_context();
-
-    sycl::event tmp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
-        cgh.depends_on(strided_fn_ev);
-        using dpctl::tensor::alloc_utils::sycl_free_noexcept;
-        cgh.host_task(
-            [ctx, shape_strides]() { sycl_free_noexcept(shape_strides, ctx); });
-    });
+    sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+        exec_q, {strided_fn_ev}, shape_strides_owner);
 
     host_tasks.push_back(tmp_cleanup_ev);