From 237ab908db851f9d68516aaf7ef0196b9848bca1 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Tue, 10 Dec 2024 21:38:44 +0000 Subject: [PATCH 1/2] fix work_group_scrach_memory Signed-off-by: Victor Lomuller --- llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp | 1 + .../WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp | 6 +++--- .../WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp | 6 +++--- sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp | 6 +++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp b/llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp index bb17dc3bc69cd..6c46b5c75d5d7 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp @@ -205,6 +205,7 @@ static bool dynamicWGLocalMemory(Module &M) { GlobalVariable::NotThreadLocal, // ThreadLocalMode LocalAS // AddressSpace ); + LocalMemArrayGV->setUnnamedAddr(GlobalVariable::UnnamedAddr::Local); constexpr int DefaultMaxAlignment = 128; if (!TT.isSPIROrSPIRV()) LocalMemArrayGV->setAlignment(Align{DefaultMaxAlignment}); diff --git a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp index 20d5d1ca917ad..04d8a85a808ff 100644 --- a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp +++ b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out // -// UNSUPPORTED: gpu-intel-gen12, cpu +// UNSUPPORTED: gpu-intel-gen12 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072 // Test work_group_dynamic extension with allocation size specified at runtime @@ -35,8 +35,8 @@ int main() { sycl_ext::properties properties{static_size}; auto LocalAccessor = sycl::local_accessor(WgSize * RepeatWG * sizeof(int), Cgh); - Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties, - [=](nd_item<1> Item) { + Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)), + properties, [=](nd_item<1> Item) { int *Ptr = reinterpret_cast( sycl_ext::get_work_group_scratch_memory()); size_t GroupOffset = diff --git a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp index fdec5db02657d..46346d5f2ee85 100644 --- a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp +++ b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out // -// UNSUPPORTED: gpu-intel-gen12, cpu +// UNSUPPORTED: gpu-intel-gen12 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072 // Test work_group_dynamic extension with allocation size specified at runtime @@ -33,8 +33,8 @@ int main() { sycl_ext::work_group_scratch_size static_size(WgSize * RepeatWG * sizeof(int)); sycl_ext::properties properties{static_size}; - Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties, - [=](nd_item<1> Item) { + Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)), + properties, [=](nd_item<1> Item) { int *Ptr = reinterpret_cast( sycl_ext::get_work_group_scratch_memory()); size_t GroupOffset = diff --git a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp index 14848f1477e3a..224bf2607f772 100644 --- a/sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp +++ b/sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out // -// UNSUPPORTED: gpu-intel-gen12, cpu +// UNSUPPORTED: gpu-intel-gen12 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072 // Test work_group_dynamic extension with allocation size specified at runtime. @@ -32,8 +32,8 @@ int main() { sycl_ext::work_group_scratch_size static_size(WgSize * RepeatWG * sizeof(int)); sycl_ext::properties properties{static_size}; - Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties, - [=](nd_item<1> Item) { + Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)), + properties, [=](nd_item<1> Item) { int *Ptr = reinterpret_cast( sycl_ext::get_work_group_scratch_memory()); size_t GroupOffset = From c85651cff1c00d89288a8a53318d95fc9c0997f9 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Wed, 11 Dec 2024 10:55:22 +0000 Subject: [PATCH 2/2] update llvm test after small optimisation --- llvm/test/SYCLLowerIR/work_group_static.ll | 2 +- llvm/test/SYCLLowerIR/work_group_static_nv.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/SYCLLowerIR/work_group_static.ll b/llvm/test/SYCLLowerIR/work_group_static.ll index 105bb270f3450..25ce1e470d3db 100644 --- a/llvm/test/SYCLLowerIR/work_group_static.ll +++ b/llvm/test/SYCLLowerIR/work_group_static.ll @@ -6,7 +6,7 @@ target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" -; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = linkonce_odr addrspace(3) global ptr addrspace(3) undef +; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = linkonce_odr local_unnamed_addr addrspace(3) global ptr addrspace(3) undef ; Function Attrs: convergent norecurse ; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0, ptr addrspace(3) noalias "sycl-implicit-local-arg" %[[IMPLICT_ARG:[a-zA-Z0-9]+]]{{.*}} !kernel_arg_addr_space ![[ADDR_SPACE_MD:[0-9]+]] diff --git a/llvm/test/SYCLLowerIR/work_group_static_nv.ll b/llvm/test/SYCLLowerIR/work_group_static_nv.ll index cc957e45ea0a8..6388265ae19d5 100644 --- a/llvm/test/SYCLLowerIR/work_group_static_nv.ll +++ b/llvm/test/SYCLLowerIR/work_group_static_nv.ll @@ -5,7 +5,7 @@ target triple = "nvptx64-nvidia-cuda" -; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = external addrspace(3) global [0 x i8], align 128 +; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = external local_unnamed_addr addrspace(3) global [0 x i8], align 128 ; Function Attrs: convergent norecurse ; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0)