diff --git a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h index 6d33d98b8810a..66b3af72fa044 100644 --- a/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h +++ b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h @@ -19,6 +19,7 @@ extern int __oclc_amdgpu_reflect(__constant char *); OUT_ORDER) \ { \ switch (IN_SCOPE) { \ + case Invocation: \ case Subgroup: \ OUT_SCOPE = __HIP_MEMORY_SCOPE_WAVEFRONT; \ break; \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl index 19d4dca833fef..0c47a7fb757fe 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl @@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ @@ -44,6 +45,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \ TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h index ecffd9e82d2fe..9813c4f729281 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h @@ -18,6 +18,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ @@ -46,6 +47,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ OP, ADDR_SPACE, ADDR_SPACE_NV) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl index 60311a978762d..9b1f708b911ad 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl @@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ TYPE_NV res = __nvvm##ORDER##_cta_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl index b2e23cd76eac2..cc0c17ddec7b4 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl @@ -16,6 +16,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ switch (scope) { \ + case Invocation: \ case Subgroup: \ case Workgroup: { \ __nvvm##ORDER##_cta_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ diff --git a/sycl/test-e2e/AtomicRef/work_item_scope.cpp b/sycl/test-e2e/AtomicRef/work_item_scope.cpp new file mode 100644 index 0000000000000..96e7b267aac83 --- /dev/null +++ b/sycl/test-e2e/AtomicRef/work_item_scope.cpp @@ -0,0 +1,65 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // This test does not validate any output + // Only that the work_item scope does not error + try { + + // Allocate device memory + int *data = sycl::malloc_device(1, q); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(10, [=](sycl::id<> id) { + data[0] = 0; + + // Check atomic_ref functionality + sycl::atomic_ref + at(data[0]); + + auto lock = at.is_lock_free(); + at.store(1); + auto load = at.load(); + auto xch = at.exchange(2); + auto weak = + at.compare_exchange_weak(data[0], 3, sycl::memory_order::relaxed, + sycl::memory_order::relaxed); + auto strong = + at.compare_exchange_strong(data[0], 4, sycl::memory_order::relaxed, + sycl::memory_order::relaxed); + auto fetch_add = at.fetch_add(5); + auto fetch_sub = at.fetch_sub(6); + auto fetch_and = at.fetch_and(7); + auto fetch_or = at.fetch_or(8); + auto fetch_xor = at.fetch_xor(9); + auto fetch_min = at.fetch_min(10); + auto fetch_max = at.fetch_max(11); + }); + }); + q.wait_and_throw(); + + sycl::free(data, q); + + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + std::cout << "Test passed!" << std::endl; + return 0; +}