diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index b99b42838a4cb..f1ecd982830a9 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1513,7 +1513,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI, const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch(); if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() || (DeviceTriple.isSPIR() && - DeviceSubArch != llvm::Triple::SPIRSubArch_fpga)) + DeviceSubArch != llvm::Triple::SPIRSubArch_fpga) || + LangOpts.SYCLIsNativeCPU) Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS"); // Enable generation of USM address spaces for FPGA. if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) { diff --git a/clang/test/Preprocessor/sycl-macro-target-specific.cpp b/clang/test/Preprocessor/sycl-macro-target-specific.cpp index 001df46104560..1d586b366469e 100644 --- a/clang/test/Preprocessor/sycl-macro-target-specific.cpp +++ b/clang/test/Preprocessor/sycl-macro-target-specific.cpp @@ -42,6 +42,8 @@ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s // RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple x86_64-unknown-linux-gnu -fsycl-is-native-cpu \ +// RUN: -E -dM | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s // CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS // CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS diff --git a/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h new file mode 100644 index 0000000000000..dbe22c62b56f3 --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/FAtomicsNativeCPU.h @@ -0,0 +1,28 @@ +//===------- FAtomicsNativeCPU.h - Materializes FP Atomics ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass that materializes floating points atomics by emitting +// corresponding atomicrmw instruction. +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ModulePass; + +class FAtomicsNativeCPU : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +} // namespace llvm diff --git a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt index b1f71ff191544..bbfb74f7a3529 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt +++ b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils RenameKernelSYCLNativeCPU.cpp ConvertToMuxBuiltinsSYCLNativeCPU.cpp FixABIMuxBuiltinsSYCLNativeCPU.cpp + FAtomicsNativeCPU.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR diff --git a/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp new file mode 100644 index 0000000000000..e9043e21ec338 --- /dev/null +++ b/llvm/lib/SYCLNativeCPUUtils/FAtomicsNativeCPU.cpp @@ -0,0 +1,50 @@ +//===------- FAtomicsNativeCPU.cpp - Materializes FP Atomics --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass that materializes floating points atomics by emitting +// corresponding atomicrmw instruction. +// +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/AtomicOrdering.h" + +using namespace llvm; + +PreservedAnalyses FAtomicsNativeCPU::run(Module &M, + ModuleAnalysisManager &MAM) { + bool ModuleChanged = false; + auto &Ctx = M.getContext(); + // TODO: add checks for windows mangling + for (auto &F : M) { + AtomicRMWInst::BinOp OpCode; + if (F.getName().starts_with("_Z21__spirv_AtomicFAddEXT")) { + OpCode = AtomicRMWInst::BinOp::FAdd; + } else if (F.getName().starts_with("_Z21__spirv_AtomicFMinEXT")) { + OpCode = AtomicRMWInst::BinOp::FMin; + } else if (F.getName().starts_with("_Z21__spirv_AtomicFMaxEXT")) { + OpCode = AtomicRMWInst::BinOp::FMax; + } else { + continue; + } + + BasicBlock *BB = BasicBlock::Create(Ctx, "entry", &F); + IRBuilder<> Builder(BB); + // Currently we drop arguments 1 and 2 (scope and memory ordering), + // defaulting to Monotonic ordering and System scope. + auto A = + Builder.CreateAtomicRMW(OpCode, F.getArg(0), F.getArg(3), MaybeAlign(), + AtomicOrdering::Monotonic, SyncScope::System); + Builder.CreateRet(A); + } + return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index 28ca1eb7103d1..b30b6c41c2b99 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h" +#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" #include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h" @@ -70,6 +71,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( OptimizationLevel OptLevel) { MPM.addPass(SpecConstantsPass(SpecConstantsPass::HandlingMode::emulation)); MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass()); + MPM.addPass(FAtomicsNativeCPU()); #ifdef NATIVECPU_USE_OCK MPM.addPass(compiler::utils::PrepareBarriersPass()); MPM.addPass(compiler::utils::TransferKernelMetadataPass()); diff --git a/sycl/test/check_device_code/native_cpu/fp_atomic.cpp b/sycl/test/check_device_code/native_cpu/fp_atomic.cpp new file mode 100644 index 0000000000000..d1abd7ec13f11 --- /dev/null +++ b/sycl/test/check_device_code/native_cpu/fp_atomic.cpp @@ -0,0 +1,41 @@ +// REQUIRES: linux +// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -S -emit-llvm -o %t_temp.ll %s +// RUN: %clangxx -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s +#include + +constexpr sycl::memory_order order = sycl::memory_order::relaxed; +constexpr sycl::memory_scope scope = sycl::memory_scope::work_group; +constexpr sycl::access::address_space space = + sycl::access::address_space::global_space; + +class Test; +using namespace sycl; +int main() { + queue q; + const size_t N = 32; + float sum = 0; + std::vector output(N); + std::fill(output.begin(), output.end(), 0.f); + { + buffer sum_buf(&sum, 1); + q.submit([&](handler &cgh) { + auto sum = sum_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(sum[0]); + atm.fetch_add(1.f, order); + //CHECK-DAG: float @_Z21__spirv_AtomicFAddEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]]) + //CHECK: %[[RES:.*]] = atomicrmw fadd ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4 + //CHECK: ret float %[[RES]] + atm.fetch_max(1.f, order); + //CHECK-DAG: float @_Z21__spirv_AtomicFMaxEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]]) + //CHECK: %[[RES:.*]] = atomicrmw fmax ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4 + //CHECK: ret float %[[RES]] + atm.fetch_min(1.f, order); + //CHECK-DAG: float @_Z21__spirv_AtomicFMinEXT{{.*}}(ptr {{.*}} %[[ARG0:.*]], i32 {{.*}}, i32 {{.*}}, float {{.*}} %[[ARG3:.*]]) + //CHECK: %[[RES:.*]] = atomicrmw fmin ptr addrspace(1) %[[ARG0]], float %[[ARG3]] monotonic, align 4 + //CHECK: ret float %[[RES]] + }); + }).wait_and_throw(); + } +}