Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (LangOpts.EnableDAEInSpirKernels)
MPM.addPass(DeadArgumentEliminationSYCLPass());

if (LangOpts.SYCLIsNativeCPU)
llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(MPM);
// Rerun aspect propagation without warning diagnostics.
MPM.addPass(
SYCLPropagateAspectsUsagePass(/*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu,
Expand Down
73 changes: 56 additions & 17 deletions clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h"
#include "llvm/TargetParser/Triple.h"
#include <optional>
#ifndef NDEBUG
#include "llvm/IR/Verifier.h"
#endif // NDEBUG
Expand Down Expand Up @@ -366,6 +368,8 @@ class BinaryWrapper {
/// Records all created memory buffers for safe auto-gc
llvm::SmallVector<std::unique_ptr<MemoryBuffer>, 4> AutoGcBufs;

std::optional<util::PropertySet> SYCLNativeCPUPropSet = std::nullopt;

public:
void addImage(const OffloadKind Kind, llvm::StringRef File,
llvm::StringRef Manif, llvm::StringRef Tgt,
Expand Down Expand Up @@ -649,16 +653,9 @@ class BinaryWrapper {
}

Function *addDeclarationForNativeCPU(StringRef Name) {
static FunctionType *NativeCPUFuncTy = FunctionType::get(
static FunctionType *FTy = FunctionType::get(
Type::getVoidTy(C),
{PointerType::getUnqual(C), PointerType::getUnqual(C)}, false);
static FunctionType *NativeCPUBuiltinTy = FunctionType::get(
PointerType::getUnqual(C), {PointerType::getUnqual(C)}, false);
FunctionType *FTy;
if (Name.starts_with("__dpcpp_nativecpu"))
FTy = NativeCPUBuiltinTy;
else
FTy = NativeCPUFuncTy;
auto FCalle = M.getOrInsertFunction(
sycl::utils::addSYCLNativeCPUSuffix(Name).str(), FTy);
Function *F = dyn_cast<Function>(FCalle.getCallee());
Expand All @@ -668,16 +665,27 @@ class BinaryWrapper {
}

Expected<std::pair<Constant *, Constant *>>
addDeclarationsForNativeCPU(StringRef EntriesFile) {
addDeclarationsForNativeCPU(StringRef EntriesFile, std::optional<util::PropertySet> NativeCPUProps) {
Expected<MemoryBuffer *> MBOrErr = loadFile(EntriesFile);
if (!MBOrErr)
return MBOrErr.takeError();
MemoryBuffer *MB = *MBOrErr;
// the Native CPU PI Plug-in expects the BinaryStart field to point to an
// array of struct nativecpu_entry {
// the Native CPU UR adapter expects the BinaryStart field to point to
//
// struct nativecpu_program {
// nativecpu_entry *entries;
// ur_program_properties_t *properties;
// };
//
// where "entries" is an array of:
//
// struct nativecpu_entry {
// char *kernelname;
// unsigned char *kernel_ptr;
// };
StructType *NCPUProgramT = StructType::create(
{PointerType::getUnqual(C), PointerType::getUnqual(C)},
"nativecpu_program");
StructType *NCPUEntryT = StructType::create(
{PointerType::getUnqual(C), PointerType::getUnqual(C)},
"__nativecpu_entry");
Expand All @@ -703,12 +711,37 @@ class BinaryWrapper {
auto *GVar = new GlobalVariable(M, CA->getType(), true,
GlobalVariable::InternalLinkage, CA,
"__sycl_native_cpu_decls");
auto *Begin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar,
auto *EntriesBegin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar,
getSizetConstPair(0u, 0u));
auto *End = ConstantExpr::getGetElementPtr(
GVar->getValueType(), GVar,
getSizetConstPair(0u, NativeCPUEntries.size()));
return std::make_pair(Begin, End);

// Add Native CPU specific properties to the nativecpu_program struct
Constant *PropValue = NullPtr;
if (NativeCPUProps.has_value()) {
auto PropsOrErr = addSYCLPropertySetToModule(*NativeCPUProps);
if (!PropsOrErr)
return PropsOrErr.takeError();
auto *Category = addStringToModule(sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS, "SYCL_PropSetName");
auto S = ConstantStruct::get(
getSyclPropSetTy(), Category, PropsOrErr.get().first, PropsOrErr.get().second);
auto T = addStructArrayToModule({S}, getSyclPropSetTy());
PropValue = T.first;
}

// Create the nativecpu_program struct.
// We add it to a ConstantArray of length 1 because the SYCL runtime expects
// a non-zero sized binary image, and this allows it to point the end of the
// binary image to the end of the array.
auto *Program = ConstantStruct::get(NCPUProgramT, {EntriesBegin, PropValue});
ArrayType *ProgramATy = ArrayType::get(NCPUProgramT, 1);
Constant *CPA = ConstantArray::get(ProgramATy, {Program});
auto *ProgramGVar = new GlobalVariable(M, ProgramATy, true,
GlobalVariable::InternalLinkage, CPA,
"__sycl_native_cpu_program");
auto *ProgramBegin = ConstantExpr::getGetElementPtr(ProgramGVar->getValueType(), ProgramGVar,
getSizetConstPair(0u, 0u));
auto *ProgramEnd = ConstantExpr::getGetElementPtr(ProgramGVar->getValueType(), ProgramGVar,
getSizetConstPair(0u, 1u));
return std::make_pair(ProgramBegin, ProgramEnd);
}

// Adds a global readonly variable that is initialized by given data to the
Expand Down Expand Up @@ -941,6 +974,12 @@ class BinaryWrapper {
// the PropSetsInits
for (const auto &PropSet : *PropRegistry) {
// create content in the rightmost column and get begin/end pointers
if (PropSet.first == sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS) {
// We don't emit Native CPU specific properties in this section, but instead
// we emit them in the native_cpu_entry struct directly.
SYCLNativeCPUPropSet = PropSet.second;
continue;
}
Expected<std::pair<Constant *, Constant *>> Props =
addSYCLPropertySetToModule(PropSet.second);
if (!Props)
Expand Down Expand Up @@ -1103,7 +1142,7 @@ class BinaryWrapper {
}
std::pair<Constant *, Constant *> Fbin;
if (Img.Tgt == "native_cpu") {
auto FBinOrErr = addDeclarationsForNativeCPU(Img.EntriesFile);
auto FBinOrErr = addDeclarationsForNativeCPU(Img.EntriesFile, SYCLNativeCPUPropSet);
if (!FBinOrErr)
return FBinOrErr.takeError();
Fbin = *FBinOrErr;
Expand Down
31 changes: 31 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//===-- CheckNDRangeSYCLNativeCPU.h -Check if a kernel uses nd_range features--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Checks if the kernel uses features from nd_item such as:
// * local id
// * local range
// * local memory
// * work group barrier
//===----------------------------------------------------------------------===//

#pragma once

#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"

namespace llvm {

class ModulePass;

class CheckNDRangeSYCLNativeCPUPass
: public PassInfoMixin<CheckNDRangeSYCLNativeCPUPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};

} // namespace llvm
7 changes: 7 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@
//===----------------------------------------------------------------------===//
#pragma once
#include "llvm/ADT/Twine.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Passes/OptimizationLevel.h"

namespace llvm {
namespace sycl {
namespace utils {

// Used to schedule passes in the device compiler cc1 invocation for
// Native CPU.
void addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM);

void addSYCLNativeCPUBackendPasses(ModulePassManager &MPM,
ModuleAnalysisManager &MAM,
OptimizationLevel OptLevel);
Expand All @@ -35,6 +40,8 @@ inline bool isSYCLNativeCPU(const Module &M) {
return M.getModuleFlag("is-native-cpu") != nullptr;
}

constexpr unsigned SyclNativeCpuLocalAS = 3;

} // namespace utils
} // namespace sycl
} // namespace llvm
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/PropertySetIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ class PropertySetRegistry {
static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements";
static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes";
static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions";
static constexpr char SYCL_NATIVE_CPU_PROPS[] = "SYCL/native cpu properties";

/// Function for bulk addition of an entire property set in the given
/// \p Category .
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,11 @@ PropSetRegTy computeModuleProperties(const Module &M,
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
*MaxLinearWGSize);
}

if (auto IsNDRange = getKernelSingleEltMetadata<bool>(Func, "is_nd_range")) {
MetadataNames.push_back(Func.getName().str() + "@is_nd_range");
PropSet.add(PropSetRegTy::SYCL_NATIVE_CPU_PROPS, MetadataNames.back(), *IsNDRange);
}
}

// Add global_id_mapping information with mapping between device-global
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
FixABIMuxBuiltinsSYCLNativeCPU.cpp
FAtomicsNativeCPU.cpp
CheckNDRangeSYCLNativeCPU.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Expand Down
121 changes: 121 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//- CheckNDRangeSYCLNativeCPU.cpp - Check if a kernel uses nd_range features -//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Checks if the kernel uses features from nd_item such as:
// * local id
// * local range
// * local memory
// * work group barrier
//===----------------------------------------------------------------------===//

#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h"
#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h"
#include "llvm/Support/Casting.h"

using namespace llvm;

static std::array<const char *, 13> NdBuiltins{
"_Z23__spirv_WorkgroupSize_xv", "_Z23__spirv_WorkgroupSize_yv",
"_Z23__spirv_WorkgroupSize_zv", "_Z23__spirv_NumWorkgroups_xv",
"_Z23__spirv_NumWorkgroups_yv", "_Z23__spirv_NumWorkgroups_zv",
"_Z21__spirv_WorkgroupId_xv", "_Z21__spirv_WorkgroupId_yv",
"_Z21__spirv_WorkgroupId_zv", "_Z27__spirv_LocalInvocationId_xv",
"_Z27__spirv_LocalInvocationId_yv", "_Z27__spirv_LocalInvocationId_zv",
"_Z22__spirv_ControlBarrierjjj"};

static void addNDRangeMetadata(Function &F, bool Value) {
auto &Ctx = F.getContext();
F.setMetadata("is_nd_range",
MDNode::get(Ctx, ConstantAsMetadata::get(ConstantInt::get(
Type::getInt1Ty(Ctx), Value))));
}

PreservedAnalyses
CheckNDRangeSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool ModuleChanged = false;
SmallPtrSet<Function *, 5> NdFuncs; // Functions that use NDRange features
SmallPtrSet<Function *, 5> Visited;
SmallPriorityWorklist<Function *, 5> WorkList;

// Add builtins to the set of functions that may use NDRange features
for (auto &FName : NdBuiltins) {
auto F = M.getFunction(FName);
if (F == nullptr)
continue;
WorkList.insert(F);
NdFuncs.insert(F);
}

// Add users of local AS global var to the set of functions that may use
// NDRange features
for (auto &GV : M.globals()) {
if (GV.getAddressSpace() != sycl::utils::SyclNativeCpuLocalAS)
continue;

for (auto U : GV.users()) {
if (auto I = dyn_cast<Instruction>(U)) {
auto F = I->getFunction();
if (F != nullptr && NdFuncs.insert(F).second) {
WorkList.insert(F);
NdFuncs.insert(F);
}
}
}
}

// Traverse the use chain to find Functions that may use NDRange features
// (or, recursively, Functions that call Functions that may use NDRange
// features)
while (!WorkList.empty()) {
auto F = WorkList.pop_back_val();

for (User *U : F->users()) {
if (auto CI = dyn_cast<CallInst>(U)) {
auto Caller = CI->getFunction();
if (!Caller)
continue;
if (!Visited.contains(Caller)) {
WorkList.insert(Caller);
NdFuncs.insert(Caller);
}
}
}
Visited.insert(F);
}

for (auto &F : M) {
if (F.getCallingConv() == llvm::CallingConv::SPIR_KERNEL) {
bool IsNDRange = false;

// Check for local memory args
for (auto &A : F.args()) {
if (auto Ptr = dyn_cast<PointerType>(A.getType());
Ptr && Ptr->getAddressSpace() == 3) {
IsNDRange = true;
}
}

// Check if the kernel calls one of the ND Range builtins
IsNDRange |= NdFuncs.contains(&F);

addNDRangeMetadata(F, IsNDRange);
ModuleChanged = true;
}
}
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
5 changes: 5 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
// When NATIVECPU_USE_OCK is set, adds passes from the oneAPI Construction Kit.
//
//===----------------------------------------------------------------------===//
#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h"
Expand Down Expand Up @@ -130,3 +131,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
MPM.addPass(DumpIR());
}
}

void llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM) {
MPM.addPass(CheckNDRangeSYCLNativeCPUPass());
}
2 changes: 1 addition & 1 deletion sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
set(UNIFIED_RUNTIME_REPO "https://github.com/PietroGhg/unified-runtime.git")
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Expand Down
8 changes: 1 addition & 7 deletions sycl/cmake/modules/UnifiedRuntimeTag.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@
# commit 9937d029c7fdcbf101e89f8515f640c145e059c5
# Merge: 9ac6d5d9 10b0e101
# Author: Callum Fare <[email protected]>
# Date: Wed Nov 20 14:49:17 2024 +0000
# Merge pull request #2258 from aarongreig/aaron/tryUseExtensionSubgroupInfo
# Use extension version of clGetKernelSubGroupInfo when necessary.
set(UNIFIED_RUNTIME_TAG 9937d029c7fdcbf101e89f8515f640c145e059c5)
set(UNIFIED_RUNTIME_TAG pietro/prop_ncpu_r_or_nd)
Loading
Loading