From 5191d6841ec1a27317959b17433c042751ba10f4 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 20 Dec 2024 09:31:38 -0800 Subject: [PATCH 1/3] Triage topk radix sort test failure by disabling one-work-group radix kernels --- .../tensor/libtensor/include/kernels/sorting/radix_sort.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp b/dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp index dc3da24315..96cd9d86a9 100644 --- a/dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp +++ b/dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp @@ -1483,7 +1483,10 @@ sycl::event parallel_radix_sort_impl(sycl::queue &exec_q, dev.template get_info(); constexpr std::uint16_t ref_wg_size = 64; - if (n_to_sort <= 16384 && ref_wg_size * 8 <= max_wg_size) { + constexpr bool enable_one_wg_radix_sort = false; + if (enable_one_wg_radix_sort && n_to_sort <= 16384 && + ref_wg_size * 8 <= max_wg_size) + { using _RadixSortKernel = OneWorkGroupRadixSortKernel; if (n_to_sort <= 64 && ref_wg_size <= max_wg_size) { From af56d6c431c9bb1711a709a7c6f7c48094e5054e Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 20 Dec 2024 15:01:45 -0800 Subject: [PATCH 2/3] Wait on radix sort event and write out event --- dpctl/tensor/libtensor/include/kernels/sorting/topk.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/sorting/topk.hpp b/dpctl/tensor/libtensor/include/kernels/sorting/topk.hpp index 2674f877c9..43685f2ab0 100644 --- a/dpctl/tensor/libtensor/include/kernels/sorting/topk.hpp +++ b/dpctl/tensor/libtensor/include/kernels/sorting/topk.hpp @@ -498,10 +498,10 @@ sycl::event topk_radix_impl(sycl::queue &exec_q, exec_q, iter_nelems, axis_nelems, workspace, tmp_tp, proj_op, ascending, {iota_ev}); + radix_sort_ev.wait(); + // Write out top k of the temporary sycl::event write_topk_ev = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(radix_sort_ev); - using KernelName = topk_radix_map_back_krn; cgh.parallel_for(iter_nelems * k, [=](sycl::id<1> id) { @@ -519,9 +519,9 @@ sycl::event topk_radix_impl(sycl::queue &exec_q, }); }); - sycl::event cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(write_topk_ev); + write_topk_ev.wait(); + sycl::event cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { const sycl::context &ctx = exec_q.get_context(); using dpctl::tensor::alloc_utils::sycl_free_noexcept; From f8e7040b3649c992cf4c7560fbb8955203dd0514 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 20 Dec 2024 15:02:29 -0800 Subject: [PATCH 3/3] re-enable one-work-group radix sort kernels --- dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp b/dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp index 96cd9d86a9..9e82004dc3 100644 --- a/dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp +++ b/dpctl/tensor/libtensor/include/kernels/sorting/radix_sort.hpp @@ -1483,7 +1483,7 @@ sycl::event parallel_radix_sort_impl(sycl::queue &exec_q, dev.template get_info(); constexpr std::uint16_t ref_wg_size = 64; - constexpr bool enable_one_wg_radix_sort = false; + constexpr bool enable_one_wg_radix_sort = true; if (enable_one_wg_radix_sort && n_to_sort <= 16384 && ref_wg_size * 8 <= max_wg_size) {