Skip to content

Commit e45aad4

Browse files
committed
Emit Native CPU properties indipendently
1 parent 3edd618 commit e45aad4

File tree

11 files changed

+210
-25
lines changed

11 files changed

+210
-25
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include "llvm/Passes/PassPlugin.h"
4848
#include "llvm/Passes/StandardInstrumentations.h"
4949
#include "llvm/ProfileData/InstrProfCorrelator.h"
50+
#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h"
5051
#include "llvm/SYCLLowerIR/CleanupSYCLMetadata.h"
5152
#include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h"
5253
#include "llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h"
@@ -1165,6 +1166,11 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
11651166
if (LangOpts.EnableDAEInSpirKernels)
11661167
MPM.addPass(DeadArgumentEliminationSYCLPass());
11671168

1169+
// We have to schedule the pass here because the native cpu pipeline
1170+
// is ran as part of a separate clang invocation, but we want the information
1171+
// in sycl-post-link.
1172+
if (LangOpts.SYCLIsNativeCPU)
1173+
MPM.addPass(CheckNDRangeSYCLNativeCPUPass());
11681174
// Rerun aspect propagation without warning diagnostics.
11691175
MPM.addPass(
11701176
SYCLPropagateAspectsUsagePass(/*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu,

clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
#include "llvm/IR/LLVMContext.h"
3232
#include "llvm/IR/Module.h"
3333
#include "llvm/IR/PassManager.h"
34+
#include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h"
3435
#include "llvm/TargetParser/Triple.h"
36+
#include <optional>
3537
#ifndef NDEBUG
3638
#include "llvm/IR/Verifier.h"
3739
#endif // NDEBUG
@@ -366,6 +368,8 @@ class BinaryWrapper {
366368
/// Records all created memory buffers for safe auto-gc
367369
llvm::SmallVector<std::unique_ptr<MemoryBuffer>, 4> AutoGcBufs;
368370

371+
std::optional<util::PropertySet> SYCLNativeCPUPropSet = std::nullopt;
372+
369373
public:
370374
void addImage(const OffloadKind Kind, llvm::StringRef File,
371375
llvm::StringRef Manif, llvm::StringRef Tgt,
@@ -649,16 +653,9 @@ class BinaryWrapper {
649653
}
650654

651655
Function *addDeclarationForNativeCPU(StringRef Name) {
652-
static FunctionType *NativeCPUFuncTy = FunctionType::get(
656+
static FunctionType *FTy = FunctionType::get(
653657
Type::getVoidTy(C),
654658
{PointerType::getUnqual(C), PointerType::getUnqual(C)}, false);
655-
static FunctionType *NativeCPUBuiltinTy = FunctionType::get(
656-
PointerType::getUnqual(C), {PointerType::getUnqual(C)}, false);
657-
FunctionType *FTy;
658-
if (Name.starts_with("__dpcpp_nativecpu"))
659-
FTy = NativeCPUBuiltinTy;
660-
else
661-
FTy = NativeCPUFuncTy;
662659
auto FCalle = M.getOrInsertFunction(
663660
sycl::utils::addSYCLNativeCPUSuffix(Name).str(), FTy);
664661
Function *F = dyn_cast<Function>(FCalle.getCallee());
@@ -668,16 +665,27 @@ class BinaryWrapper {
668665
}
669666

670667
Expected<std::pair<Constant *, Constant *>>
671-
addDeclarationsForNativeCPU(StringRef EntriesFile) {
668+
addDeclarationsForNativeCPU(StringRef EntriesFile, std::optional<util::PropertySet> NativeCPUProps) {
672669
Expected<MemoryBuffer *> MBOrErr = loadFile(EntriesFile);
673670
if (!MBOrErr)
674671
return MBOrErr.takeError();
675672
MemoryBuffer *MB = *MBOrErr;
676-
// the Native CPU PI Plug-in expects the BinaryStart field to point to an
677-
// array of struct nativecpu_entry {
673+
// the Native CPU UR adapter expects the BinaryStart field to point to
674+
//
675+
// struct nativecpu_program {
676+
// nativecpu_entry *entries;
677+
// ur_program_properties_t *properties;
678+
// };
679+
//
680+
// where "entries" is an array of:
681+
//
682+
// struct nativecpu_entry {
678683
// char *kernelname;
679684
// unsigned char *kernel_ptr;
680685
// };
686+
StructType *NCPUProgramT = StructType::create(
687+
{PointerType::getUnqual(C), PointerType::getUnqual(C)},
688+
"nativecpu_program");
681689
StructType *NCPUEntryT = StructType::create(
682690
{PointerType::getUnqual(C), PointerType::getUnqual(C)},
683691
"__nativecpu_entry");
@@ -703,12 +711,30 @@ class BinaryWrapper {
703711
auto *GVar = new GlobalVariable(M, CA->getType(), true,
704712
GlobalVariable::InternalLinkage, CA,
705713
"__sycl_native_cpu_decls");
706-
auto *Begin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar,
714+
auto *EntriesBegin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar,
715+
getSizetConstPair(0u, 0u));
716+
Constant *PropValue = NullPtr;
717+
if (NativeCPUProps.has_value()) {
718+
auto PropsOrErr = addSYCLPropertySetToModule(*NativeCPUProps);
719+
if (!PropsOrErr)
720+
return PropsOrErr.takeError();
721+
auto *Category = addStringToModule(sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS, "SYCL_PropSetName");
722+
auto S = ConstantStruct::get(
723+
getSyclPropSetTy(), Category, PropsOrErr.get().first, PropsOrErr.get().second);
724+
auto T = addStructArrayToModule({S}, getSyclPropSetTy());
725+
PropValue = T.first;
726+
}
727+
auto *Program = ConstantStruct::get(NCPUProgramT, {EntriesBegin, PropValue});
728+
ArrayType *ProgramATy = ArrayType::get(NCPUProgramT, 1);
729+
Constant *CPA = ConstantArray::get(ProgramATy, {Program});
730+
auto *ProgramGVar = new GlobalVariable(M, ProgramATy, true,
731+
GlobalVariable::InternalLinkage, CPA,
732+
"__sycl_native_cpu_program");
733+
auto *ProgramBegin = ConstantExpr::getGetElementPtr(ProgramGVar->getValueType(), ProgramGVar,
707734
getSizetConstPair(0u, 0u));
708-
auto *End = ConstantExpr::getGetElementPtr(
709-
GVar->getValueType(), GVar,
710-
getSizetConstPair(0u, NativeCPUEntries.size()));
711-
return std::make_pair(Begin, End);
735+
auto *ProgramEnd = ConstantExpr::getGetElementPtr(ProgramGVar->getValueType(), ProgramGVar,
736+
getSizetConstPair(0u, 1u));
737+
return std::make_pair(ProgramBegin, ProgramEnd);
712738
}
713739

714740
// Adds a global readonly variable that is initialized by given data to the
@@ -941,6 +967,12 @@ class BinaryWrapper {
941967
// the PropSetsInits
942968
for (const auto &PropSet : *PropRegistry) {
943969
// create content in the rightmost column and get begin/end pointers
970+
if (PropSet.first == sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS) {
971+
// We don't emit Native CPU specific properties in this section, but instead
972+
// we emit them in the native_cpu_entry struct directly.
973+
SYCLNativeCPUPropSet = PropSet.second;
974+
continue;
975+
}
944976
Expected<std::pair<Constant *, Constant *>> Props =
945977
addSYCLPropertySetToModule(PropSet.second);
946978
if (!Props)
@@ -1103,7 +1135,7 @@ class BinaryWrapper {
11031135
}
11041136
std::pair<Constant *, Constant *> Fbin;
11051137
if (Img.Tgt == "native_cpu") {
1106-
auto FBinOrErr = addDeclarationsForNativeCPU(Img.EntriesFile);
1138+
auto FBinOrErr = addDeclarationsForNativeCPU(Img.EntriesFile, SYCLNativeCPUPropSet);
11071139
if (!FBinOrErr)
11081140
return FBinOrErr.takeError();
11091141
Fbin = *FBinOrErr;
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//===-- CheckNDRangeSYCLNativeCPU.h -Check if a kernel uses nd_range features--===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A transformation pass that:
10+
// * Handles the kernel calling convention and attributes.
11+
// * Materializes the spirv builtins so that they can be handled by the host
12+
// runtime.
13+
//===----------------------------------------------------------------------===//
14+
15+
#pragma once
16+
17+
#include "llvm/IR/Module.h"
18+
#include "llvm/IR/PassManager.h"
19+
20+
namespace llvm {
21+
22+
class ModulePass;
23+
24+
class CheckNDRangeSYCLNativeCPUPass
25+
: public PassInfoMixin<CheckNDRangeSYCLNativeCPUPass> {
26+
public:
27+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
28+
};
29+
30+
} // namespace llvm

llvm/include/llvm/Support/PropertySetIO.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ class PropertySetRegistry {
210210
static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements";
211211
static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes";
212212
static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions";
213+
static constexpr char SYCL_NATIVE_CPU_PROPS[] = "SYCL/native cpu properties";
213214

214215
/// Function for bulk addition of an entire property set in the given
215216
/// \p Category .

llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,11 @@ PropSetRegTy computeModuleProperties(const Module &M,
307307
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
308308
*MaxLinearWGSize);
309309
}
310+
311+
if (auto IsNDRange = getKernelSingleEltMetadata<bool>(Func, "is_nd_range")) {
312+
MetadataNames.push_back(Func.getName().str() + "@is_nd_range");
313+
PropSet.add(PropSetRegTy::SYCL_NATIVE_CPU_PROPS, MetadataNames.back(), *IsNDRange);
314+
}
310315
}
311316

312317
// Add global_id_mapping information with mapping between device-global

llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils
55
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
66
FixABIMuxBuiltinsSYCLNativeCPU.cpp
77
FAtomicsNativeCPU.cpp
8+
CheckNDRangeSYCLNativeCPU.cpp
89

910
ADDITIONAL_HEADER_DIRS
1011
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
//===------ PrepareSYCLNativeCPU.cpp - SYCL Native CPU Preparation Pass ---===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Checks if the kernel uses features from nd_item such as:
10+
// * local id
11+
// * local range
12+
// * local memory
13+
// * work group barrier
14+
//===----------------------------------------------------------------------===//
15+
16+
#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h"
17+
#include "llvm/IR/CallingConv.h"
18+
#include "llvm/IR/Constants.h"
19+
#include "llvm/IR/DerivedTypes.h"
20+
#include "llvm/IR/InstrTypes.h"
21+
#include "llvm/IR/Instructions.h"
22+
#include "llvm/IR/Metadata.h"
23+
24+
using namespace llvm;
25+
26+
// TODO: add other bts
27+
static std::array<const char *, 5> ndFunctions{
28+
"_Z23__spirv_WorkgroupSize_xv", "_Z23__spirv_NumWorkgroups_xv",
29+
"_Z21__spirv_WorkgroupId_xv", "_Z27__spirv_LocalInvocationId_xv",
30+
"_Z22__spirv_ControlBarrierjjj"};
31+
32+
static void addNDRangeMetadata(Function &F, bool Value) {
33+
auto &Ctx = F.getContext();
34+
F.setMetadata("is_nd_range",
35+
MDNode::get(Ctx, ConstantAsMetadata::get(ConstantInt::get(
36+
Type::getInt1Ty(Ctx), Value))));
37+
}
38+
39+
PreservedAnalyses
40+
CheckNDRangeSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) {
41+
bool ModuleChanged = false;
42+
43+
for (auto &F : M) {
44+
if (F.getCallingConv() == llvm::CallingConv::SPIR_KERNEL) {
45+
bool IsNDRange = false;
46+
47+
// Check for local memory args
48+
for (auto &A : F.args()) {
49+
if (auto Ptr = dyn_cast<PointerType>(A.getType());
50+
Ptr && Ptr->getAddressSpace() == 3) {
51+
IsNDRange = true;
52+
}
53+
}
54+
55+
for (auto &BB : F) {
56+
for (auto &I : BB) {
57+
if (auto CI = dyn_cast<CallInst>(&I)) {
58+
auto CalleeName = CI->getCalledFunction()->getName();
59+
if (std::find(ndFunctions.begin(), ndFunctions.end(), CalleeName) !=
60+
ndFunctions.end()) {
61+
IsNDRange = true;
62+
break;
63+
}
64+
}
65+
}
66+
if (IsNDRange) {
67+
break;
68+
}
69+
}
70+
71+
addNDRangeMetadata(F, IsNDRange);
72+
}
73+
}
74+
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
75+
}

llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
// When NATIVECPU_USE_OCK is set, adds passes from the oneAPI Construction Kit.
1212
//
1313
//===----------------------------------------------------------------------===//
14+
#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h"
1415
#include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h"
1516
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
1617
#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h"

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
116116
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
117117
endfunction()
118118

119-
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
119+
set(UNIFIED_RUNTIME_REPO "https://github.com/PietroGhg/unified-runtime.git")
120120
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)
121121

122122
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1 @@
1-
# commit 9937d029c7fdcbf101e89f8515f640c145e059c5
2-
# Merge: 9ac6d5d9 10b0e101
3-
# Author: Callum Fare <[email protected]>
4-
# Date: Wed Nov 20 14:49:17 2024 +0000
5-
# Merge pull request #2258 from aarongreig/aaron/tryUseExtensionSubgroupInfo
6-
# Use extension version of clGetKernelSubGroupInfo when necessary.
7-
set(UNIFIED_RUNTIME_TAG 9937d029c7fdcbf101e89f8515f640c145e059c5)
1+
set(UNIFIED_RUNTIME_TAG pietro/prop_ncpu_r_or_nd)

0 commit comments

Comments
 (0)