Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1513,7 +1513,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() ||
(DeviceTriple.isSPIR() &&
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga) ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a FE test? I assume there is already a FE test checking the existence of this macro for other targets. You can just add a RUN there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added it, thank you

LangOpts.SYCLIsNativeCPU)
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
// Enable generation of USM address spaces for FPGA.
if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) {
Expand Down
28 changes: 28 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//===------- FAtomicsNativeCPU.h - Materializes FP Atomics ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// A transformation pass that materializes floating points atomics by emitting
// corresponding atomicrmw instruction.
//
//===----------------------------------------------------------------------===//

#pragma once

#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"

namespace llvm {

class ModulePass;

class FAtomicsNativeCPU : public PassInfoMixin<FAtomicsNativeCPU> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};

} // namespace llvm
1 change: 1 addition & 0 deletions llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils
RenameKernelSYCLNativeCPU.cpp
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
FixABIMuxBuiltinsSYCLNativeCPU.cpp
FAtomicsNativeCPU.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Expand Down
49 changes: 49 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//===------- FAtomicsNativeCPU.cpp - Materializes FP Atomics --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// A transformation pass that materializes floating points atomics by emitting
// corresponding atomicrmw instruction.
//
//===----------------------------------------------------------------------===//

#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"

using namespace llvm;

PreservedAnalyses FAtomicsNativeCPU::run(Module &M,
ModuleAnalysisManager &MAM) {
bool ModuleChanged = false;
auto &Ctx = M.getContext();
for (auto &F : M) {
AtomicRMWInst::BinOp OpCode;
if (F.getName().starts_with("_Z21__spirv_AtomicFAddEXT")) {
OpCode = AtomicRMWInst::BinOp::FAdd;
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMinEXT")) {
OpCode = AtomicRMWInst::BinOp::FMin;
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMaxEXT")) {
OpCode = AtomicRMWInst::BinOp::FMax;
} else {
continue;
}

BasicBlock *BB = BasicBlock::Create(Ctx, "entry", &F);
IRBuilder<> Builder(BB);
// Currently we drop arguments 1 and 2 (scope and memory ordering),
// defaulting to Monotonic ordering and System scope.
auto A =
Builder.CreateAtomicRMW(OpCode, F.getArg(0), F.getArg(3), MaybeAlign(),
AtomicOrdering::Monotonic, SyncScope::System);
Builder.CreateRet(A);
}
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
2 changes: 2 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h"
Expand Down Expand Up @@ -70,6 +71,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
OptimizationLevel OptLevel) {
MPM.addPass(SpecConstantsPass(SpecConstantsPass::HandlingMode::emulation));
MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass());
MPM.addPass(FAtomicsNativeCPU());
#ifdef NATIVECPU_USE_OCK
MPM.addPass(compiler::utils::PrepareBarriersPass());
MPM.addPass(compiler::utils::TransferKernelMetadataPass());
Expand Down
40 changes: 40 additions & 0 deletions sycl/test/check_device_code/native_cpu/fp_atomic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -S -emit-llvm -o %t_temp.ll %s
// RUN: %clangxx -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s
#include <sycl/sycl.hpp>

constexpr sycl::memory_order order = sycl::memory_order::relaxed;
constexpr sycl::memory_scope scope = sycl::memory_scope::work_group;
constexpr sycl::access::address_space space =
sycl::access::address_space::global_space;

class Test;
using namespace sycl;
int main() {
queue q;
const size_t N = 32;
float sum = 0;
std::vector<float> output(N);
std::fill(output.begin(), output.end(), 0.f);
{
buffer<float> sum_buf(&sum, 1);
q.submit([&](handler &cgh) {
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for<Test>(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<float, order, scope, space>(sum[0]);
atm.fetch_add(1.f, order);
//CHECK-DAG: float @_Z21__spirv_AtomicFAddEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
//CHECK: %[[RES:.*]] = atomicrmw fadd ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
//CHECK: ret float %[[RES]]
atm.fetch_max(1.f, order);
//CHECK-DAG: float @_Z21__spirv_AtomicFMaxEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
//CHECK: %[[RES:.*]] = atomicrmw fmax ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
//CHECK: ret float %[[RES]]
atm.fetch_min(1.f, order);
//CHECK-DAG: float @_Z21__spirv_AtomicFMinEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
//CHECK: %[[RES:.*]] = atomicrmw fmin ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
//CHECK: ret float %[[RES]]
});
}).wait_and_throw();
}
}
Loading