Skip to content

Commit c063ed8

Browse files
author
Alberto Cabrera Pérez
authored
Merge branch 'sycl' into counter-based-4
2 parents 2ca29f1 + 79b620b commit c063ed8

File tree

20 files changed

+597
-116
lines changed

20 files changed

+597
-116
lines changed

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1513,7 +1513,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
15131513
const llvm::Triple::SubArchType DeviceSubArch = DeviceTriple.getSubArch();
15141514
if (DeviceTriple.isNVPTX() || DeviceTriple.isAMDGPU() ||
15151515
(DeviceTriple.isSPIR() &&
1516-
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga))
1516+
DeviceSubArch != llvm::Triple::SPIRSubArch_fpga) ||
1517+
LangOpts.SYCLIsNativeCPU)
15171518
Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS");
15181519
// Enable generation of USM address spaces for FPGA.
15191520
if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) {

clang/test/Preprocessor/sycl-macro-target-specific.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
4343
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amdhsa-amdhsa -E -dM \
4444
// RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
45+
// RUN: %clang_cc1 %s -fsycl-is-device -triple x86_64-unknown-linux-gnu -fsycl-is-native-cpu \
46+
// RUN: -E -dM | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS %s
4547
// CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS
4648
// CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS
4749

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===------- FAtomicsNativeCPU.h - Materializes FP Atomics ----------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A transformation pass that materializes floating points atomics by emitting
10+
// corresponding atomicrmw instruction.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#pragma once
15+
16+
#include "llvm/IR/Module.h"
17+
#include "llvm/IR/PassManager.h"
18+
19+
namespace llvm {
20+
21+
class ModulePass;
22+
23+
class FAtomicsNativeCPU : public PassInfoMixin<FAtomicsNativeCPU> {
24+
public:
25+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
26+
};
27+
28+
} // namespace llvm

llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils
44
RenameKernelSYCLNativeCPU.cpp
55
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
66
FixABIMuxBuiltinsSYCLNativeCPU.cpp
7+
FAtomicsNativeCPU.cpp
78

89
ADDITIONAL_HEADER_DIRS
910
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//===------- FAtomicsNativeCPU.cpp - Materializes FP Atomics --------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A transformation pass that materializes floating points atomics by emitting
10+
// corresponding atomicrmw instruction.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
15+
#include "llvm/IR/IRBuilder.h"
16+
#include "llvm/IR/Instructions.h"
17+
#include "llvm/IR/LLVMContext.h"
18+
#include "llvm/Support/Alignment.h"
19+
#include "llvm/Support/AtomicOrdering.h"
20+
21+
using namespace llvm;
22+
23+
PreservedAnalyses FAtomicsNativeCPU::run(Module &M,
24+
ModuleAnalysisManager &MAM) {
25+
bool ModuleChanged = false;
26+
auto &Ctx = M.getContext();
27+
// TODO: add checks for windows mangling
28+
for (auto &F : M) {
29+
AtomicRMWInst::BinOp OpCode;
30+
if (F.getName().starts_with("_Z21__spirv_AtomicFAddEXT")) {
31+
OpCode = AtomicRMWInst::BinOp::FAdd;
32+
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMinEXT")) {
33+
OpCode = AtomicRMWInst::BinOp::FMin;
34+
} else if (F.getName().starts_with("_Z21__spirv_AtomicFMaxEXT")) {
35+
OpCode = AtomicRMWInst::BinOp::FMax;
36+
} else {
37+
continue;
38+
}
39+
40+
BasicBlock *BB = BasicBlock::Create(Ctx, "entry", &F);
41+
IRBuilder<> Builder(BB);
42+
// Currently we drop arguments 1 and 2 (scope and memory ordering),
43+
// defaulting to Monotonic ordering and System scope.
44+
auto A =
45+
Builder.CreateAtomicRMW(OpCode, F.getArg(0), F.getArg(3), MaybeAlign(),
46+
AtomicOrdering::Monotonic, SyncScope::System);
47+
Builder.CreateRet(A);
48+
}
49+
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
50+
}

llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//
1313
//===----------------------------------------------------------------------===//
1414
#include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h"
15+
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
1516
#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h"
1617
#include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h"
1718
#include "llvm/SYCLLowerIR/RenameKernelSYCLNativeCPU.h"
@@ -70,6 +71,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
7071
OptimizationLevel OptLevel) {
7172
MPM.addPass(SpecConstantsPass(SpecConstantsPass::HandlingMode::emulation));
7273
MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass());
74+
MPM.addPass(FAtomicsNativeCPU());
7375
#ifdef NATIVECPU_USE_OCK
7476
MPM.addPass(compiler::utils::PrepareBarriersPass());
7577
MPM.addPass(compiler::utils::TransferKernelMetadataPass());

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,8 +1450,14 @@ PreservedAnalyses AddressSanitizerPass::run(Module &M,
14501450
const StackSafetyGlobalInfo *const SSGI =
14511451
ClUseStackSafety ? &MAM.getResult<StackSafetyGlobalAnalysis>(M) : nullptr;
14521452

1453-
if (Triple(M.getTargetTriple()).isSPIROrSPIRV())
1453+
if (Triple(M.getTargetTriple()).isSPIROrSPIRV()) {
14541454
ExtendSpirKernelArgs(M, FAM);
1455+
// FIXME: W/A skip instrumentation if this module has ESIMD
1456+
for (auto &F : M) {
1457+
if (F.hasMetadata("sycl_explicit_simd"))
1458+
return PreservedAnalyses::all();
1459+
}
1460+
}
14551461

14561462
for (Function &F : M) {
14571463
AddressSanitizer FunctionSanitizer(
@@ -3497,10 +3503,6 @@ bool AddressSanitizer::instrumentFunction(Function &F,
34973503
// function isn't supported yet in intel-graphics-compiler.
34983504
if (F.hasFnAttribute("referenced-indirectly"))
34993505
return false;
3500-
// FIXME: ESIMD kernel doesn't support noinline functions, so we can't
3501-
// support sanitizer for it
3502-
if (F.hasMetadata("sycl_explicit_simd"))
3503-
return false;
35043506
}
35053507

35063508
bool FunctionModified = false;

llvm/test/Instrumentation/AddressSanitizer/SPIRV/sycl_esimd.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
44
target triple = "spir64-unknown-unknown"
55

6+
;;
7+
;; W/A: We skip asan completely if one module has esimd
8+
;;
9+
610
define spir_kernel void @sycl_kernel(ptr addrspace(1) %p) #0 {
711
; CHECK-LABEL: define spir_kernel void @sycl_kernel(ptr addrspace(1) %p, ptr addrspace(1) %__asan_launch) #0
812
entry:
913
%0 = load i32, ptr addrspace(1) %p, align 4
10-
; CHECK: store ptr addrspace(1) %__asan_launch, ptr addrspace(3) @__AsanLaunchInfo, align 8
11-
; CHECK: call void @__asan_load4
14+
; CHECK-NOT: store ptr addrspace(1) %__asan_launch, ptr addrspace(3) @__AsanLaunchInfo, align 8
15+
; CHECK-NOT: call void @__asan_load4
1216
ret void
1317
}
1418

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 120 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <detail/kernel_compiler/kernel_compiler_opencl.hpp>
1313
#include <detail/kernel_compiler/kernel_compiler_sycl.hpp>
1414
#include <detail/kernel_impl.hpp>
15+
#include <detail/persistent_device_code_cache.hpp>
1516
#include <detail/program_manager/program_manager.hpp>
1617
#include <sycl/backend_types.hpp>
1718
#include <sycl/context.hpp>
@@ -396,6 +397,53 @@ class kernel_bundle_impl {
396397
return SS.str();
397398
}
398399

400+
bool
401+
extKernelCompilerFetchFromCache(const std::vector<device> Devices,
402+
const std::vector<std::string> &BuildOptions,
403+
const std::string &SourceStr,
404+
ur_program_handle_t &UrProgram) {
405+
using ContextImplPtr = std::shared_ptr<sycl::detail::context_impl>;
406+
ContextImplPtr ContextImpl = getSyclObjImpl(MContext);
407+
const AdapterPtr &Adapter = ContextImpl->getAdapter();
408+
409+
std::string UserArgs = syclex::detail::userArgsAsString(BuildOptions);
410+
411+
std::vector<ur_device_handle_t> DeviceHandles;
412+
std::transform(
413+
Devices.begin(), Devices.end(), std::back_inserter(DeviceHandles),
414+
[](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); });
415+
416+
std::vector<const uint8_t *> Binaries;
417+
std::vector<size_t> Lengths;
418+
std::vector<std::vector<std::vector<char>>> PersistentBinaries;
419+
for (size_t i = 0; i < Devices.size(); i++) {
420+
std::vector<std::vector<char>> BinProg =
421+
PersistentDeviceCodeCache::getCompiledKernelFromDisc(
422+
Devices[i], UserArgs, SourceStr);
423+
424+
// exit if any device binary is missing
425+
if (BinProg.empty()) {
426+
return false;
427+
}
428+
PersistentBinaries.push_back(BinProg);
429+
430+
Binaries.push_back((uint8_t *)(PersistentBinaries[i][0].data()));
431+
Lengths.push_back(PersistentBinaries[i][0].size());
432+
}
433+
434+
ur_program_properties_t Properties = {};
435+
Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES;
436+
Properties.pNext = nullptr;
437+
Properties.count = 0;
438+
Properties.pMetadatas = nullptr;
439+
440+
Adapter->call<UrApiKind::urProgramCreateWithBinary>(
441+
ContextImpl->getHandleRef(), DeviceHandles.size(), DeviceHandles.data(),
442+
Lengths.data(), Binaries.data(), &Properties, &UrProgram);
443+
444+
return true;
445+
}
446+
399447
std::shared_ptr<kernel_bundle_impl>
400448
build_from_source(const std::vector<device> Devices,
401449
const std::vector<std::string> &BuildOptions,
@@ -415,57 +463,68 @@ class kernel_bundle_impl {
415463
DeviceVec.push_back(Dev);
416464
}
417465

418-
const auto spirv = [&]() -> std::vector<uint8_t> {
419-
if (Language == syclex::source_language::opencl) {
420-
// if successful, the log is empty. if failed, throws an error with the
421-
// compilation log.
422-
const auto &SourceStr = std::get<std::string>(this->Source);
423-
std::vector<uint32_t> IPVersionVec(Devices.size());
424-
std::transform(DeviceVec.begin(), DeviceVec.end(), IPVersionVec.begin(),
425-
[&](ur_device_handle_t d) {
426-
uint32_t ipVersion = 0;
427-
Adapter->call<UrApiKind::urDeviceGetInfo>(
428-
d, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t),
429-
&ipVersion, nullptr);
430-
return ipVersion;
431-
});
432-
return syclex::detail::OpenCLC_to_SPIRV(SourceStr, IPVersionVec,
433-
BuildOptions, LogPtr);
434-
}
435-
if (Language == syclex::source_language::spirv) {
436-
const auto &SourceBytes =
437-
std::get<std::vector<std::byte>>(this->Source);
438-
std::vector<uint8_t> Result(SourceBytes.size());
439-
std::transform(SourceBytes.cbegin(), SourceBytes.cend(), Result.begin(),
440-
[](std::byte B) { return static_cast<uint8_t>(B); });
441-
return Result;
442-
}
443-
if (Language == syclex::source_language::sycl) {
444-
const auto &SourceStr = std::get<std::string>(this->Source);
445-
return syclex::detail::SYCL_to_SPIRV(SourceStr, IncludePairs,
446-
BuildOptions, LogPtr,
447-
RegisteredKernelNames);
448-
}
449-
if (Language == syclex::source_language::sycl_jit) {
450-
const auto &SourceStr = std::get<std::string>(this->Source);
451-
return syclex::detail::SYCL_JIT_to_SPIRV(SourceStr, IncludePairs,
452-
BuildOptions, LogPtr,
453-
RegisteredKernelNames);
454-
}
455-
throw sycl::exception(
456-
make_error_code(errc::invalid),
457-
"OpenCL C and SPIR-V are the only supported languages at this time");
458-
}();
459-
460466
ur_program_handle_t UrProgram = nullptr;
461-
Adapter->call<UrApiKind::urProgramCreateWithIL>(ContextImpl->getHandleRef(),
462-
spirv.data(), spirv.size(),
463-
nullptr, &UrProgram);
464-
// program created by urProgramCreateWithIL is implicitly retained.
465-
if (UrProgram == nullptr)
466-
throw sycl::exception(
467-
sycl::make_error_code(errc::invalid),
468-
"urProgramCreateWithIL resulted in a null program handle.");
467+
// SourceStrPtr will be null when source is Spir-V bytes.
468+
const std::string *SourceStrPtr = std::get_if<std::string>(&this->Source);
469+
bool FetchedFromCache = false;
470+
if (PersistentDeviceCodeCache::isEnabled() && SourceStrPtr) {
471+
FetchedFromCache = extKernelCompilerFetchFromCache(
472+
Devices, BuildOptions, *SourceStrPtr, UrProgram);
473+
}
474+
475+
if (!FetchedFromCache) {
476+
const auto spirv = [&]() -> std::vector<uint8_t> {
477+
if (Language == syclex::source_language::opencl) {
478+
// if successful, the log is empty. if failed, throws an error with
479+
// the compilation log.
480+
std::vector<uint32_t> IPVersionVec(Devices.size());
481+
std::transform(DeviceVec.begin(), DeviceVec.end(),
482+
IPVersionVec.begin(), [&](ur_device_handle_t d) {
483+
uint32_t ipVersion = 0;
484+
Adapter->call<UrApiKind::urDeviceGetInfo>(
485+
d, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t),
486+
&ipVersion, nullptr);
487+
return ipVersion;
488+
});
489+
return syclex::detail::OpenCLC_to_SPIRV(*SourceStrPtr, IPVersionVec,
490+
BuildOptions, LogPtr);
491+
}
492+
if (Language == syclex::source_language::spirv) {
493+
const auto &SourceBytes =
494+
std::get<std::vector<std::byte>>(this->Source);
495+
std::vector<uint8_t> Result(SourceBytes.size());
496+
std::transform(SourceBytes.cbegin(), SourceBytes.cend(),
497+
Result.begin(),
498+
[](std::byte B) { return static_cast<uint8_t>(B); });
499+
return Result;
500+
}
501+
if (Language == syclex::source_language::sycl) {
502+
return syclex::detail::SYCL_to_SPIRV(*SourceStrPtr, IncludePairs,
503+
BuildOptions, LogPtr,
504+
RegisteredKernelNames);
505+
}
506+
if (Language == syclex::source_language::sycl_jit) {
507+
const auto &SourceStr = std::get<std::string>(this->Source);
508+
return syclex::detail::SYCL_JIT_to_SPIRV(SourceStr, IncludePairs,
509+
BuildOptions, LogPtr,
510+
RegisteredKernelNames);
511+
}
512+
throw sycl::exception(
513+
make_error_code(errc::invalid),
514+
"SYCL C++, OpenCL C and SPIR-V are the only supported "
515+
"languages at this time");
516+
}();
517+
518+
Adapter->call<UrApiKind::urProgramCreateWithIL>(
519+
ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr,
520+
&UrProgram);
521+
// program created by urProgramCreateWithIL is implicitly retained.
522+
if (UrProgram == nullptr)
523+
throw sycl::exception(
524+
sycl::make_error_code(errc::invalid),
525+
"urProgramCreateWithIL resulted in a null program handle.");
526+
527+
} // if(!FetchedFromCache)
469528

470529
std::string XsFlags = extractXsFlags(BuildOptions);
471530
auto Res = Adapter->call_nocheck<UrApiKind::urProgramBuildExp>(
@@ -501,6 +560,17 @@ class kernel_bundle_impl {
501560
nullptr, MContext, MDevices, bundle_state::executable, KernelIDs,
502561
UrProgram);
503562
device_image_plain DevImg{DevImgImpl};
563+
564+
// If caching enabled and kernel not fetched from cache, cache.
565+
if (PersistentDeviceCodeCache::isEnabled() && !FetchedFromCache &&
566+
SourceStrPtr) {
567+
for (const auto &Device : Devices) {
568+
PersistentDeviceCodeCache::putCompiledKernelToDisc(
569+
Device, syclex::detail::userArgsAsString(BuildOptions),
570+
*SourceStrPtr, UrProgram);
571+
}
572+
}
573+
504574
return std::make_shared<kernel_bundle_impl>(MContext, MDevices, DevImg,
505575
KernelNames, Language);
506576
}

sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ SYCL_to_SPIRV(const std::string &SYCLSource, include_pairs_t IncludePairs,
3636
throw sycl::exception(sycl::errc::build,
3737
"kernel_compiler does not support GCC<8");
3838
}
39+
40+
std::string userArgsAsString(const std::vector<std::string> &UserArguments) {
41+
return std::accumulate(UserArguments.begin(), UserArguments.end(),
42+
std::string(""),
43+
[](const std::string &A, const std::string &B) {
44+
return A.empty() ? B : A + " " + B;
45+
});
46+
}
47+
3948
} // namespace detail
4049
} // namespace ext::oneapi::experimental
4150
} // namespace _V1

0 commit comments

Comments
 (0)