Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1513,7 +1513,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() ||
(DeviceTriple.isSPIR() &&
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga) ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a FE test? I assume there is already a FE test checking the existence of this macro for other targets. You can just add a RUN there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added it, thank you

LangOpts.SYCLIsNativeCPU)
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
// Enable generation of USM address spaces for FPGA.
if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) {
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Preprocessor/sycl-macro-target-specific.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple x86_64-unknown-linux-gnu -fsycl-is-native-cpu \
// RUN: -E -dM | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
// CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS
// CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS

Expand Down
28 changes: 28 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//===------- FAtomicsNativeCPU.h - Materializes FP Atomics ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// A transformation pass that materializes floating points atomics by emitting
// corresponding atomicrmw instruction.
//
//===----------------------------------------------------------------------===//

#pragma once

#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"

namespace llvm {

class ModulePass;

class FAtomicsNativeCPU : public PassInfoMixin<FAtomicsNativeCPU> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};

} // namespace llvm
1 change: 1 addition & 0 deletions llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils
RenameKernelSYCLNativeCPU.cpp
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
FixABIMuxBuiltinsSYCLNativeCPU.cpp
FAtomicsNativeCPU.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Expand Down
50 changes: 50 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===------- FAtomicsNativeCPU.cpp - Materializes FP Atomics --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// A transformation pass that materializes floating points atomics by emitting
// corresponding atomicrmw instruction.
//
//===----------------------------------------------------------------------===//

#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"

using namespace llvm;

PreservedAnalyses FAtomicsNativeCPU::run(Module &M,
ModuleAnalysisManager &MAM) {
bool ModuleChanged = false;
auto &Ctx = M.getContext();
// TODO: add checks for windows mangling
for (auto &F : M) {
AtomicRMWInst::BinOp OpCode;
if (F.getName().starts_with("_Z21__spirv_AtomicFAddEXT")) {
OpCode = AtomicRMWInst::BinOp::FAdd;
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMinEXT")) {
OpCode = AtomicRMWInst::BinOp::FMin;
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMaxEXT")) {
OpCode = AtomicRMWInst::BinOp::FMax;
} else {
continue;
}

BasicBlock *BB = BasicBlock::Create(Ctx, "entry", &F);
IRBuilder<> Builder(BB);
// Currently we drop arguments 1 and 2 (scope and memory ordering),
// defaulting to Monotonic ordering and System scope.
auto A =
Builder.CreateAtomicRMW(OpCode, F.getArg(0), F.getArg(3), MaybeAlign(),
AtomicOrdering::Monotonic, SyncScope::System);
Builder.CreateRet(A);
}
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
2 changes: 2 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h"
Expand Down Expand Up @@ -70,6 +71,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
OptimizationLevel OptLevel) {
MPM.addPass(SpecConstantsPass(SpecConstantsPass::HandlingMode::emulation));
MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass());
MPM.addPass(FAtomicsNativeCPU());
#ifdef NATIVECPU_USE_OCK
MPM.addPass(compiler::utils::PrepareBarriersPass());
MPM.addPass(compiler::utils::TransferKernelMetadataPass());
Expand Down
41 changes: 41 additions & 0 deletions sycl/test/check_device_code/native_cpu/fp_atomic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// REQUIRES: linux
// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -S -emit-llvm -o %t_temp.ll %s
// RUN: %clangxx -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s
#include <sycl/sycl.hpp>

constexpr sycl::memory_order order = sycl::memory_order::relaxed;
constexpr sycl::memory_scope scope = sycl::memory_scope::work_group;
constexpr sycl::access::address_space space =
sycl::access::address_space::global_space;

class Test;
using namespace sycl;
int main() {
queue q;
const size_t N = 32;
float sum = 0;
std::vector<float> output(N);
std::fill(output.begin(), output.end(), 0.f);
{
buffer<float> sum_buf(&sum, 1);
q.submit([&](handler &cgh) {
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for<Test>(range<1>(N), [=](item<1> it) {
int gid = it.get_id(0);
auto atm = atomic_ref<float, order, scope, space>(sum[0]);
atm.fetch_add(1.f, order);
//CHECK-DAG: float @_Z21__spirv_AtomicFAddEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
//CHECK: %[[RES:.*]] = atomicrmw fadd ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
//CHECK: ret float %[[RES]]
atm.fetch_max(1.f, order);
//CHECK-DAG: float @_Z21__spirv_AtomicFMaxEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
//CHECK: %[[RES:.*]] = atomicrmw fmax ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
//CHECK: ret float %[[RES]]
atm.fetch_min(1.f, order);
//CHECK-DAG: float @_Z21__spirv_AtomicFMinEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]])
//CHECK: %[[RES:.*]] = atomicrmw fmin ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4
//CHECK: ret float %[[RES]]
});
}).wait_and_throw();
}
}
Loading