Skip to content

Commit 6dd4984

Browse files
authored
[SYCL][NATIVECPU] Materialize floating point atomics in LLVM pass (#15888)
This PR sets `SYCL_USE_NATIVE_FP_ATOMICS` when compiling for Native CPU, and provides an implementation for said atomics via an LLVM pass that defines them through `atomicrmw` instructions.
1 parent 7c44a8f commit 6dd4984

File tree

7 files changed

+126
-1
lines changed

7 files changed

+126
-1
lines changed

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1513,7 +1513,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
15131513
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
15141514
if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() ||
15151515
(DeviceTriple.isSPIR() &&
1516-
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
1516+
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga) ||
1517+
LangOpts.SYCLIsNativeCPU)
15171518
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
15181519
// Enable generation of USM address spaces for FPGA.
15191520
if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) {

clang/test/Preprocessor/sycl-macro-target-specific.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
4343
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
4444
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
45+
// RUN: %clang_cc1 %s -fsycl-is-device -triple x86_64-unknown-linux-gnu -fsycl-is-native-cpu \
46+
// RUN: -E -dM | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
4547
// CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS
4648
// CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS
4749

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===------- FAtomicsNativeCPU.h - Materializes FP Atomics ----------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A transformation pass that materializes floating points atomics by emitting
10+
// corresponding atomicrmw instruction.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#pragma once
15+
16+
#include "llvm/IR/Module.h"
17+
#include "llvm/IR/PassManager.h"
18+
19+
namespace llvm {
20+
21+
class ModulePass;
22+
23+
class FAtomicsNativeCPU : public PassInfoMixin<FAtomicsNativeCPU> {
24+
public:
25+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
26+
};
27+
28+
} // namespace llvm

llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils
44
RenameKernelSYCLNativeCPU.cpp
55
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
66
FixABIMuxBuiltinsSYCLNativeCPU.cpp
7+
FAtomicsNativeCPU.cpp
78

89
ADDITIONAL_HEADER_DIRS
910
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//===------- FAtomicsNativeCPU.cpp - Materializes FP Atomics --------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A transformation pass that materializes floating points atomics by emitting
10+
// corresponding atomicrmw instruction.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
15+
#include "llvm/IR/IRBuilder.h"
16+
#include "llvm/IR/Instructions.h"
17+
#include "llvm/IR/LLVMContext.h"
18+
#include "llvm/Support/Alignment.h"
19+
#include "llvm/Support/AtomicOrdering.h"
20+
21+
using namespace llvm;
22+
23+
PreservedAnalyses FAtomicsNativeCPU::run(Module &M,
24+
ModuleAnalysisManager &MAM) {
25+
bool ModuleChanged = false;
26+
auto &Ctx = M.getContext();
27+
// TODO: add checks for windows mangling
28+
for (auto &F : M) {
29+
AtomicRMWInst::BinOp OpCode;
30+
if (F.getName().starts_with("_Z21__spirv_AtomicFAddEXT")) {
31+
OpCode = AtomicRMWInst::BinOp::FAdd;
32+
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMinEXT")) {
33+
OpCode = AtomicRMWInst::BinOp::FMin;
34+
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMaxEXT")) {
35+
OpCode = AtomicRMWInst::BinOp::FMax;
36+
} else {
37+
continue;
38+
}
39+
40+
BasicBlock *BB = BasicBlock::Create(Ctx, "entry", &F);
41+
IRBuilder<> Builder(BB);
42+
// Currently we drop arguments 1 and 2 (scope and memory ordering),
43+
// defaulting to Monotonic ordering and System scope.
44+
auto A =
45+
Builder.CreateAtomicRMW(OpCode, F.getArg(0), F.getArg(3), MaybeAlign(),
46+
AtomicOrdering::Monotonic, SyncScope::System);
47+
Builder.CreateRet(A);
48+
}
49+
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
50+
}

llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//
1313
//===----------------------------------------------------------------------===//
1414
#include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h"
15+
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
1516
#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h"
1617
#include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h"
1718
#include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h"
@@ -70,6 +71,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
7071
OptimizationLevel OptLevel) {
7172
MPM.addPass(SpecConstantsPass(SpecConstantsPass::HandlingMode::emulation));
7273
MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass());
74+
MPM.addPass(FAtomicsNativeCPU());
7375
#ifdef NATIVECPU_USE_OCK
7476
MPM.addPass(compiler::utils::PrepareBarriersPass());
7577
MPM.addPass(compiler::utils::TransferKernelMetadataPass());
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// REQUIRES: linux
2+
// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -S -emit-llvm -o %t_temp.ll %s
3+
// RUN: %clangxx -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s
4+
#include <sycl/sycl.hpp>
5+
6+
constexpr sycl::memory_order order = sycl::memory_order::relaxed;
7+
constexpr sycl::memory_scope scope = sycl::memory_scope::work_group;
8+
constexpr sycl::access::address_space space =
9+
sycl::access::address_space::global_space;
10+
11+
class Test;
12+
using namespace sycl;
13+
int main() {
14+
queue q;
15+
const size_t N = 32;
16+
float sum = 0;
17+
std::vector<float> output(N);
18+
std::fill(output.begin(), output.end(), 0.f);
19+
{
20+
buffer<float> sum_buf(&sum, 1);
21+
q.submit([&](handler &cgh) {
22+
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
23+
cgh.parallel_for<Test>(range<1>(N), [=](item<1> it) {
24+
int gid = it.get_id(0);
25+
auto atm = atomic_ref<float, order, scope, space>(sum[0]);
26+
atm.fetch_add(1.f, order);
27+
//CHECK-DAG: float @_Z21__spirv_AtomicFAddEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
28+
//CHECK: %[[RES:.*]] = atomicrmw fadd ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
29+
//CHECK: ret float %[[RES]]
30+
atm.fetch_max(1.f, order);
31+
//CHECK-DAG: float @_Z21__spirv_AtomicFMaxEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
32+
//CHECK: %[[RES:.*]] = atomicrmw fmax ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
33+
//CHECK: ret float %[[RES]]
34+
atm.fetch_min(1.f, order);
35+
//CHECK-DAG: float @_Z21__spirv_AtomicFMinEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
36+
//CHECK: %[[RES:.*]] = atomicrmw fmin ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
37+
//CHECK: ret float %[[RES]]
38+
});
39+
}).wait_and_throw();
40+
}
41+
}

0 commit comments

Comments
 (0)