From b78605beb1cd0440d4012354d8cf8e5c04f0a0e3 Mon Sep 17 00:00:00 2001 From: "Sidorov, Dmitry" Date: Wed, 10 Sep 2025 04:40:20 -0700 Subject: [PATCH 1/7] [SYCL] Don't run SYCLOptimizeBarriersPass with O0 Signed-off-by: Sidorov, Dmitry --- clang/lib/CodeGen/BackendUtil.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 51d153413a582..e6e070acbb44e 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1093,8 +1093,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline( ThinOrFullLTOPhase) { MPM.addPass(createModuleToFunctionPassAdaptor( InferAddressSpacesPass(clang::targets::SPIR_GENERIC_AS))); - MPM.addPass( - createModuleToFunctionPassAdaptor(SYCLOptimizeBarriersPass())); + if (Level != OptimizationLevel::O0) + MPM.addPass( + createModuleToFunctionPassAdaptor( + SYCLOptimizeBarriersPass())); }); } From 96a7a7ac85ab111410c9d735988ff052469f8c4c Mon Sep 17 00:00:00 2001 From: "Sidorov, Dmitry" Date: Wed, 10 Sep 2025 05:27:03 -0700 Subject: [PATCH 2/7] format Signed-off-by: Sidorov, Dmitry --- clang/lib/CodeGen/BackendUtil.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e6e070acbb44e..8bf72cd263931 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1094,9 +1094,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( MPM.addPass(createModuleToFunctionPassAdaptor( InferAddressSpacesPass(clang::targets::SPIR_GENERIC_AS))); if (Level != OptimizationLevel::O0) - MPM.addPass( - createModuleToFunctionPassAdaptor( - SYCLOptimizeBarriersPass())); + MPM.addPass(createModuleToFunctionPassAdaptor( + SYCLOptimizeBarriersPass())); }); } From 1aeedf7e9db055dca68234513a363efa842203d9 Mon Sep 17 00:00:00 2001 From: "Sidorov, Dmitry" Date: Wed, 10 Sep 2025 10:09:02 -0700 Subject: [PATCH 3/7] add test --- .../narrow-barrier-explicit-spirv-call.cpp | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp diff --git a/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp b/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp new file mode 100644 index 0000000000000..04d4e5a364fae --- /dev/null +++ b/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp @@ -0,0 +1,65 @@ +// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -S -Xclang -emit-llvm -Xclang -no-enable-noundef-analysis -O2 %s -o - | FileCheck %s --check-prefix=CHECK-O2 +// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -S -Xclang -emit-llvm -Xclang -no-enable-noundef-analysis -O0 %s -o - | FileCheck %s --check-prefix=CHECK-O0 + +// The test checks if SYCLOptimizeBarriers pass can perform barrier scope +// narrowing in case if there are no fenced global accesses, where a barrier +// is an explicit SPIR-V friendly call. + +// CHECK-O2: call spir_func void @_Z22__spirv_ControlBarrieriii(i32{{.*}}2, i32{{.*}}2, i32{{.*}}912) +// CHECK-O2: call spir_func void @_Z22__spirv_ControlBarrieriii(i32{{.*}}2, i32{{.*}}2, i32{{.*}}400) + +// CHECK-O0: call spir_func void @_Z22__spirv_ControlBarrieriii(i32{{.*}}2, i32{{.*}}2, i32{{.*}}912) +// CHECK-O0: call spir_func void @_Z22__spirv_ControlBarrieriii(i32{{.*}}2, i32{{.*}}2, i32{{.*}}912) + +#include + +constexpr size_t WORK_GROUP_SIZE = 1024; +constexpr size_t NUMBER_OF_WORK_GROUPS = 64; +constexpr size_t NUMBER_OF_ITERATIONS = 100; + +struct GroupBarrierKernel { + + GroupBarrierKernel(sycl::handler &h, float *sum) + : sum(sum), local(WORK_GROUP_SIZE, h) {} + + void operator()(sycl::nd_item<1> it) const { + + const size_t item_id = it.get_local_id()[0]; + const size_t item_range = it.get_local_range()[0]; + const size_t group_id = it.get_group().get_group_id()[0]; + + for (int i = 0; i < item_id; i += item_range) { + local[i] = i; + } + + sycl::group_barrier(it.get_group()); + for (int offset = 1; offset < item_range; offset *= 2) { + local[item_id] += local[item_id + offset]; + __spirv_ControlBarrier(2, 2, 912); + } + + if (it.get_group().leader()) { + __spirv_ControlBarrier(2, 2, 912); + sum[group_id] = local[0]; + } + } + + float *sum; + sycl::local_accessor local; +}; + +int main(int argc, char *argv[]) { + sycl::queue q{sycl::property::queue::enable_profiling{}}; + float *sum = sycl::malloc_shared(NUMBER_OF_WORK_GROUPS, q); + + double modern_ns = 0; + for (int r = 0; r < NUMBER_OF_ITERATIONS + 1; ++r) { + sycl::event e = q.submit([&](sycl::handler &h) { + auto k = GroupBarrierKernel(h, sum); + h.parallel_for(sycl::nd_range<1>{NUMBER_OF_WORK_GROUPS * WORK_GROUP_SIZE, + WORK_GROUP_SIZE}, + k); + }); + e.wait(); + } +} From 68a84f0b723b5181cb542d76c9a254b985ba70aa Mon Sep 17 00:00:00 2001 From: "Sidorov, Dmitry" Date: Wed, 10 Sep 2025 10:13:59 -0700 Subject: [PATCH 4/7] adjust test Signed-off-by: Sidorov, Dmitry --- .../narrow-barrier-explicit-spirv-call.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp b/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp index 04d4e5a364fae..53b476adb9690 100644 --- a/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp +++ b/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp @@ -32,14 +32,17 @@ struct GroupBarrierKernel { local[i] = i; } - sycl::group_barrier(it.get_group()); + __spirv_ControlBarrier(/*WG scope*/ 2, /*WG scope*/ 2, + /*global + local*/ 912); for (int offset = 1; offset < item_range; offset *= 2) { local[item_id] += local[item_id + offset]; - __spirv_ControlBarrier(2, 2, 912); + __spirv_ControlBarrier(/*WG scope*/ 2, /*WG scope*/ 2, + /*global + local*/ 912); } if (it.get_group().leader()) { - __spirv_ControlBarrier(2, 2, 912); + __spirv_ControlBarrier(/*WG scope*/ 2, /*WG scope*/ 2, + /*global + local*/ 912); sum[group_id] = local[0]; } } From 73266d47af038585e63b0535293b8d5aa97c2915 Mon Sep 17 00:00:00 2001 From: "Sidorov, Dmitry" Date: Thu, 11 Sep 2025 06:40:46 -0700 Subject: [PATCH 5/7] move the test Signed-off-by: Sidorov, Dmitry --- .../kernel-early-optimization-pipeline.cpp | 5 ++ .../narrow-barrier-explicit-spirv-call.cpp | 68 ------------------- 2 files changed, 5 insertions(+), 68 deletions(-) delete mode 100644 sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp diff --git a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp index c75e48b9727a9..74526f00db911 100644 --- a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp +++ b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp @@ -13,6 +13,7 @@ // CHECK: AlwaysInlinerPass // CHECK: ModuleInlinerWrapperPass // CHECK: ConstantMergePass +// SYCLOptimizeBarriersPass // CHECK: SYCLMutatePrintfAddrspacePass // CHECK: SYCLPropagateAspectsUsagePass // CHECK: SYCLAddOptLevelAttributePass @@ -22,4 +23,8 @@ // // RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -fno-sycl-early-optimizations -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-NOEARLYOPT // CHECK-NOEARLYOPT-NOT: ConstantMergePass1 +// CHECK-NOEARLYOPT-NOT: SYCLOptimizeBarriersPass // CHECK-NOEARLYOPT: SYCLMutatePrintfAddrspacePass + +// RUN: %clang_cc1 -O0 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0 +// CHECK-O0-NOT: SYCLOptimizeBarriersPass diff --git a/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp b/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp deleted file mode 100644 index 53b476adb9690..0000000000000 --- a/sycl/test/check_device_code/narrow-barrier-explicit-spirv-call.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -S -Xclang -emit-llvm -Xclang -no-enable-noundef-analysis -O2 %s -o - | FileCheck %s --check-prefix=CHECK-O2 -// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -S -Xclang -emit-llvm -Xclang -no-enable-noundef-analysis -O0 %s -o - | FileCheck %s --check-prefix=CHECK-O0 - -// The test checks if SYCLOptimizeBarriers pass can perform barrier scope -// narrowing in case if there are no fenced global accesses, where a barrier -// is an explicit SPIR-V friendly call. - -// CHECK-O2: call spir_func void @_Z22__spirv_ControlBarrieriii(i32{{.*}}2, i32{{.*}}2, i32{{.*}}912) -// CHECK-O2: call spir_func void @_Z22__spirv_ControlBarrieriii(i32{{.*}}2, i32{{.*}}2, i32{{.*}}400) - -// CHECK-O0: call spir_func void @_Z22__spirv_ControlBarrieriii(i32{{.*}}2, i32{{.*}}2, i32{{.*}}912) -// CHECK-O0: call spir_func void @_Z22__spirv_ControlBarrieriii(i32{{.*}}2, i32{{.*}}2, i32{{.*}}912) - -#include - -constexpr size_t WORK_GROUP_SIZE = 1024; -constexpr size_t NUMBER_OF_WORK_GROUPS = 64; -constexpr size_t NUMBER_OF_ITERATIONS = 100; - -struct GroupBarrierKernel { - - GroupBarrierKernel(sycl::handler &h, float *sum) - : sum(sum), local(WORK_GROUP_SIZE, h) {} - - void operator()(sycl::nd_item<1> it) const { - - const size_t item_id = it.get_local_id()[0]; - const size_t item_range = it.get_local_range()[0]; - const size_t group_id = it.get_group().get_group_id()[0]; - - for (int i = 0; i < item_id; i += item_range) { - local[i] = i; - } - - __spirv_ControlBarrier(/*WG scope*/ 2, /*WG scope*/ 2, - /*global + local*/ 912); - for (int offset = 1; offset < item_range; offset *= 2) { - local[item_id] += local[item_id + offset]; - __spirv_ControlBarrier(/*WG scope*/ 2, /*WG scope*/ 2, - /*global + local*/ 912); - } - - if (it.get_group().leader()) { - __spirv_ControlBarrier(/*WG scope*/ 2, /*WG scope*/ 2, - /*global + local*/ 912); - sum[group_id] = local[0]; - } - } - - float *sum; - sycl::local_accessor local; -}; - -int main(int argc, char *argv[]) { - sycl::queue q{sycl::property::queue::enable_profiling{}}; - float *sum = sycl::malloc_shared(NUMBER_OF_WORK_GROUPS, q); - - double modern_ns = 0; - for (int r = 0; r < NUMBER_OF_ITERATIONS + 1; ++r) { - sycl::event e = q.submit([&](sycl::handler &h) { - auto k = GroupBarrierKernel(h, sum); - h.parallel_for(sycl::nd_range<1>{NUMBER_OF_WORK_GROUPS * WORK_GROUP_SIZE, - WORK_GROUP_SIZE}, - k); - }); - e.wait(); - } -} From 71bdb3f2c7fedc2be9a9f7b4b3c888dd7c2bc5d6 Mon Sep 17 00:00:00 2001 From: "Sidorov, Dmitry" Date: Tue, 16 Sep 2025 04:21:03 -0700 Subject: [PATCH 6/7] adjust test --- .../kernel-early-optimization-pipeline.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp index 74526f00db911..f69604e888342 100644 --- a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp +++ b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp @@ -12,8 +12,8 @@ // CHECK: InferFunctionAttrsPass // CHECK: AlwaysInlinerPass // CHECK: ModuleInlinerWrapperPass +// CHECK: SYCLOptimizeBarriersPass // CHECK: ConstantMergePass -// SYCLOptimizeBarriersPass // CHECK: SYCLMutatePrintfAddrspacePass // CHECK: SYCLPropagateAspectsUsagePass // CHECK: SYCLAddOptLevelAttributePass @@ -28,3 +28,12 @@ // RUN: %clang_cc1 -O0 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0 // CHECK-O0-NOT: SYCLOptimizeBarriersPass + +template +void kernel(const Func &f) __attribute__((sycl_kernel)) { + f(); +} + +void bar() { + kernel([=]() {}); +} From 3496abb313d9bd63013b8ed863d088a9a5d8c837 Mon Sep 17 00:00:00 2001 From: "Sidorov, Dmitry" Date: Tue, 16 Sep 2025 08:24:17 -0700 Subject: [PATCH 7/7] shorted test --- .../CodeGenSYCL/kernel-early-optimization-pipeline.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp index f69604e888342..d352f1bcca39a 100644 --- a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp +++ b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp @@ -29,11 +29,7 @@ // RUN: %clang_cc1 -O0 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0 // CHECK-O0-NOT: SYCLOptimizeBarriersPass -template -void kernel(const Func &f) __attribute__((sycl_kernel)) { - f(); -} - -void bar() { - kernel([=]() {}); +// Passes registered via registerOptimizerLastEPCallback don't run on empty +// code +__attribute__((sycl_device)) void bar() { }