From 1e09ffcd2d8e3efdb1b49f28a59af71cfde5adc5 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Wed, 20 Nov 2024 15:44:59 +0000 Subject: [PATCH 1/2] [SYCL][libclc][E2E] atomic work_item scope fallback Add Invocation case, that falls back to a coarser grained scope, to libclc atomic functions. This prevents hangs on AMD and crashes on NVIDIA when using atomic_ref functionality with work_item scope. Add a test which simply checks that the kernel does not crash when using atomic_ref with work_item scope. See issue: https://github.com/intel/llvm/issues/16037 --- .../libspirv/atomic/atomic_helpers.h | 1 + .../libspirv/atomic/atomic_cmpxchg.cl | 2 + .../libspirv/atomic/atomic_helpers.h | 2 + .../libspirv/atomic/atomic_load.cl | 1 + .../libspirv/atomic/atomic_store.cl | 1 + sycl/test-e2e/AtomicRef/work_item_scope.cpp | 63 +++++++++++++++++++ 6 files changed, 70 insertions(+) create mode 100644 sycl/test-e2e/AtomicRef/work_item_scope.cpp diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h index 6d33d98b8810a..66b3af72fa044 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h @@ -19,6 +19,7 @@ extern int __oclc_amdgpu_reflect(__constant char *); OUT_ORDER) \ { \ switch (IN_SCOPE) { \ + case Invocation: \ case Subgroup: \ OUT_SCOPE = __HIP_MEMORY_SCOPE_WAVEFRONT; \ break; \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl index 19d4dca833fef..0c47a7fb757fe 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl @@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ @@ -44,6 +45,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \ TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h index ecffd9e82d2fe..9813c4f729281 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h @@ -18,6 +18,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ @@ -46,6 +47,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ OP, ADDR_SPACE, ADDR_SPACE_NV) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl index 60311a978762d..9b1f708b911ad 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl @@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ TYPE_NV res = __nvvm##ORDER##_cta_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl index b2e23cd76eac2..cc0c17ddec7b4 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl @@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ __nvvm##ORDER##_cta_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ diff --git a/sycl/test-e2e/AtomicRef/work_item_scope.cpp b/sycl/test-e2e/AtomicRef/work_item_scope.cpp new file mode 100644 index 0000000000000..dd7b8f122e516 --- /dev/null +++ b/sycl/test-e2e/AtomicRef/work_item_scope.cpp @@ -0,0 +1,63 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // This test does not validate any output + // Only that the work_item scope does not error + try { + + // Allocate device memory + int *data = sycl::malloc_device(1, q); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(10, [=](sycl::id<> id) { + data[0] = 0; + + // Check atomic_ref functionality + sycl::atomic_ref + at(data[0]); + + auto lock = at.is_lock_free(); + at.store(1); + auto load = at.load(); + auto xch = at.exchange(2); + auto weak = + at.compare_exchange_weak(data[0], 3, sycl::memory_order::relaxed, + sycl::memory_order::relaxed); + auto strong = + at.compare_exchange_strong(data[0], 4, sycl::memory_order::relaxed, + sycl::memory_order::relaxed); + auto fetch_add = at.fetch_add(5); + auto fetch_sub = at.fetch_sub(6); + auto fetch_and = at.fetch_and(7); + auto fetch_or = at.fetch_or(8); + auto fetch_xor = at.fetch_xor(9); + auto fetch_min = at.fetch_min(10); + auto fetch_max = at.fetch_max(11); + }); + }); + q.wait_and_throw(); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + std::cout << "Test passed!" << std::endl; + return 0; +} From eb65fce176cfe7048b13ca36ef17262944fd9ad7 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Mon, 25 Nov 2024 20:47:27 +0000 Subject: [PATCH 2/2] Add missing free to test --- sycl/test-e2e/AtomicRef/work_item_scope.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/test-e2e/AtomicRef/work_item_scope.cpp b/sycl/test-e2e/AtomicRef/work_item_scope.cpp index dd7b8f122e516..96e7b267aac83 100644 --- a/sycl/test-e2e/AtomicRef/work_item_scope.cpp +++ b/sycl/test-e2e/AtomicRef/work_item_scope.cpp @@ -50,6 +50,8 @@ int main() { }); q.wait_and_throw(); + sycl::free(data, q); + } catch (sycl::exception e) { std::cerr << "SYCL exception caught! : " << e.what() << "\n"; return 1;