Skip to content

Commit 656617b

Browse files
committed
[SYCL][NATIVECPU] resolved merge
2 parents 7f1f2b9 + 79b620b commit 656617b

File tree

12 files changed

+227
-31
lines changed

12 files changed

+227
-31
lines changed

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1513,7 +1513,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
15131513
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
15141514
if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() ||
15151515
(DeviceTriple.isSPIR() &&
1516-
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
1516+
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga) ||
1517+
LangOpts.SYCLIsNativeCPU)
15171518
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
15181519
// Enable generation of USM address spaces for FPGA.
15191520
if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) {

clang/test/Preprocessor/sycl-macro-target-specific.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
4343
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
4444
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
45+
// RUN: %clang_cc1 %s -fsycl-is-device -triple x86_64-unknown-linux-gnu -fsycl-is-native-cpu \
46+
// RUN: -E -dM | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
4547
// CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS
4648
// CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS
4749

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===------- FAtomicsNativeCPU.h - Materializes FP Atomics ----------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A transformation pass that materializes floating points atomics by emitting
10+
// corresponding atomicrmw instruction.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#pragma once
15+
16+
#include "llvm/IR/Module.h"
17+
#include "llvm/IR/PassManager.h"
18+
19+
namespace llvm {
20+
21+
class ModulePass;
22+
23+
class FAtomicsNativeCPU : public PassInfoMixin<FAtomicsNativeCPU> {
24+
public:
25+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
26+
};
27+
28+
} // namespace llvm

llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils
44
RenameKernelSYCLNativeCPU.cpp
55
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
66
FixABIMuxBuiltinsSYCLNativeCPU.cpp
7+
FAtomicsNativeCPU.cpp
78

89
ADDITIONAL_HEADER_DIRS
910
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//===------- FAtomicsNativeCPU.cpp - Materializes FP Atomics --------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A transformation pass that materializes floating points atomics by emitting
10+
// corresponding atomicrmw instruction.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
15+
#include "llvm/IR/IRBuilder.h"
16+
#include "llvm/IR/Instructions.h"
17+
#include "llvm/IR/LLVMContext.h"
18+
#include "llvm/Support/Alignment.h"
19+
#include "llvm/Support/AtomicOrdering.h"
20+
21+
using namespace llvm;
22+
23+
PreservedAnalyses FAtomicsNativeCPU::run(Module &M,
24+
ModuleAnalysisManager &MAM) {
25+
bool ModuleChanged = false;
26+
auto &Ctx = M.getContext();
27+
// TODO: add checks for windows mangling
28+
for (auto &F : M) {
29+
AtomicRMWInst::BinOp OpCode;
30+
if (F.getName().starts_with("_Z21__spirv_AtomicFAddEXT")) {
31+
OpCode = AtomicRMWInst::BinOp::FAdd;
32+
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMinEXT")) {
33+
OpCode = AtomicRMWInst::BinOp::FMin;
34+
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMaxEXT")) {
35+
OpCode = AtomicRMWInst::BinOp::FMax;
36+
} else {
37+
continue;
38+
}
39+
40+
BasicBlock *BB = BasicBlock::Create(Ctx, "entry", &F);
41+
IRBuilder<> Builder(BB);
42+
// Currently we drop arguments 1 and 2 (scope and memory ordering),
43+
// defaulting to Monotonic ordering and System scope.
44+
auto A =
45+
Builder.CreateAtomicRMW(OpCode, F.getArg(0), F.getArg(3), MaybeAlign(),
46+
AtomicOrdering::Monotonic, SyncScope::System);
47+
Builder.CreateRet(A);
48+
}
49+
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
50+
}

llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//
1313
//===----------------------------------------------------------------------===//
1414
#include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h"
15+
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
1516
#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h"
1617
#include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h"
1718
#include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h"
@@ -70,6 +71,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
7071
OptimizationLevel OptLevel) {
7172
MPM.addPass(SpecConstantsPass(SpecConstantsPass::HandlingMode::emulation));
7273
MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass());
74+
MPM.addPass(FAtomicsNativeCPU());
7375
#ifdef NATIVECPU_USE_OCK
7476
MPM.addPass(compiler::utils::PrepareBarriersPass());
7577
MPM.addPass(compiler::utils::TransferKernelMetadataPass());

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,8 +1450,14 @@ PreservedAnalyses AddressSanitizerPass::run(Module &M,
14501450
const StackSafetyGlobalInfo *const SSGI =
14511451
ClUseStackSafety ? &MAM.getResult<StackSafetyGlobalAnalysis>(M) : nullptr;
14521452

1453-
if (Triple(M.getTargetTriple()).isSPIROrSPIRV())
1453+
if (Triple(M.getTargetTriple()).isSPIROrSPIRV()) {
14541454
ExtendSpirKernelArgs(M, FAM);
1455+
// FIXME: W/A skip instrumentation if this module has ESIMD
1456+
for (auto &F : M) {
1457+
if (F.hasMetadata("sycl_explicit_simd"))
1458+
return PreservedAnalyses::all();
1459+
}
1460+
}
14551461

14561462
for (Function &F : M) {
14571463
AddressSanitizer FunctionSanitizer(
@@ -3497,10 +3503,6 @@ bool AddressSanitizer::instrumentFunction(Function &F,
34973503
// function isn't supported yet in intel-graphics-compiler.
34983504
if (F.hasFnAttribute("referenced-indirectly"))
34993505
return false;
3500-
// FIXME: ESIMD kernel doesn't support noinline functions, so we can't
3501-
// support sanitizer for it
3502-
if (F.hasMetadata("sycl_explicit_simd"))
3503-
return false;
35043506
}
35053507

35063508
bool FunctionModified = false;

llvm/test/Instrumentation/AddressSanitizer/SPIRV/sycl_esimd.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
44
target triple = "spir64-unknown-unknown"
55

6+
;;
7+
;; W/A: We skip asan completely if one module has esimd
8+
;;
9+
610
define spir_kernel void @sycl_kernel(ptr addrspace(1) %p) #0 {
711
; CHECK-LABEL: define spir_kernel void @sycl_kernel(ptr addrspace(1) %p, ptr addrspace(1) %__asan_launch) #0
812
entry:
913
%0 = load i32, ptr addrspace(1) %p, align 4
10-
; CHECK: store ptr addrspace(1) %__asan_launch, ptr addrspace(3) @__AsanLaunchInfo, align 8
11-
; CHECK: call void @__asan_load4
14+
; CHECK-NOT: store ptr addrspace(1) %__asan_launch, ptr addrspace(3) @__AsanLaunchInfo, align 8
15+
; CHECK-NOT: call void @__asan_load4
1216
ret void
1317
}
1418

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// This test checks whether data can be correctly written to and read from
2+
// virtual memory.
3+
4+
// RUN: %{build} -o %t.out
5+
// RUN: %{run} %t.out
6+
7+
#include "helpers.hpp"
8+
9+
int main() {
10+
sycl::queue Q;
11+
sycl::context Context = Q.get_context();
12+
sycl::device Device = Q.get_device();
13+
int Failed = 0;
14+
constexpr size_t NumberOfElements = 1000;
15+
size_t BytesRequired = NumberOfElements * sizeof(int);
16+
17+
size_t UsedGranularity = GetLCMGranularity(Device, Context);
18+
19+
size_t AlignedByteSize =
20+
((BytesRequired + UsedGranularity - 1) / UsedGranularity) *
21+
UsedGranularity;
22+
23+
syclext::physical_mem NewPhysicalMem{Device, Context, AlignedByteSize};
24+
uintptr_t VirtualMemoryPtr =
25+
syclext::reserve_virtual_mem(0, AlignedByteSize, Context);
26+
27+
void *MappedPtr =
28+
NewPhysicalMem.map(VirtualMemoryPtr, AlignedByteSize,
29+
syclext::address_access_mode::read_write);
30+
31+
int *DataPtr = reinterpret_cast<int *>(MappedPtr);
32+
33+
std::vector<int> ResultHostData(NumberOfElements);
34+
35+
constexpr int ExpectedValueAfterFill = 1;
36+
37+
Q.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements).wait_and_throw();
38+
{
39+
sycl::buffer<int> CheckBuffer(ResultHostData);
40+
Q.submit([&](sycl::handler &Handle) {
41+
sycl::accessor A(CheckBuffer, Handle, sycl::write_only);
42+
Handle.parallel_for(NumberOfElements,
43+
[=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; });
44+
});
45+
}
46+
47+
for (size_t i = 0; i < ResultHostData.size(); i++) {
48+
if (ResultHostData[i] != ExpectedValueAfterFill) {
49+
std::cout << "Comparison failed after fill operation at index " << i
50+
<< ": " << ResultHostData[i] << " != " << ExpectedValueAfterFill
51+
<< std::endl;
52+
++Failed;
53+
}
54+
}
55+
56+
Q.parallel_for(NumberOfElements, [=](sycl::id<1> Idx) {
57+
DataPtr[Idx] = Idx;
58+
}).wait_and_throw();
59+
60+
syclext::set_access_mode(DataPtr, AlignedByteSize,
61+
syclext::address_access_mode::read, Context);
62+
63+
{
64+
sycl::buffer<int> ResultBuffer(ResultHostData);
65+
66+
Q.submit([&](sycl::handler &Handle) {
67+
sycl::accessor A(ResultBuffer, Handle, sycl::write_only);
68+
Handle.parallel_for(NumberOfElements,
69+
[=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; });
70+
});
71+
}
72+
73+
for (size_t i = 0; i < NumberOfElements; i++) {
74+
const int ExpectedValue = static_cast<int>(i);
75+
if (ResultHostData[i] != ExpectedValue) {
76+
std::cout << "Comparison failed at index " << i << ": "
77+
<< ResultHostData[i] << " != " << ExpectedValue << std::endl;
78+
++Failed;
79+
}
80+
}
81+
82+
syclext::unmap(MappedPtr, AlignedByteSize, Context);
83+
syclext::free_virtual_mem(VirtualMemoryPtr, AlignedByteSize, Context);
84+
85+
return Failed;
86+
}

sycl/test-e2e/VirtualMem/remapping_virtual_memory_range.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
// RUN: %{build} -o %t.out
55
// RUN: %{run} %t.out
66

7-
#include <sycl/detail/core.hpp>
8-
97
#include <cassert>
108

119
#include "helpers.hpp"

0 commit comments

Comments
 (0)