diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ad650704a1615..2d8a07d427dad 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10952,6 +10952,7 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA, const llvm::opt::ArgList &TCArgs, ArgStringList &PostLinkArgs) { + // See if device code splitting is requested if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_device_code_split_EQ)) { auto CodeSplitValue = StringRef(A->getValue()); @@ -10977,6 +10978,39 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (allowDeviceImageDependencies(TCArgs)) addArgs(PostLinkArgs, TCArgs, {"-allow-device-image-dependencies"}); + + bool DeviceLibDisable = false; + Arg *DeviceLibArg = TCArgs.getLastArg(options::OPT_fsycl_device_lib_EQ, + options::OPT_fno_sycl_device_lib_EQ); + if (DeviceLibArg && + DeviceLibArg->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) { + for (StringRef Val : DeviceLibArg->getValues()) { + if (Val == "all") { + DeviceLibDisable = true; + break; + } + } + } + + // Fallback spv is NOT involved in AOT compilation or + // '-fno-sycl-device-lib=all' is applied by user explicitly. + if (TC.getTriple().isSPIROrSPIRV() && !TC.getTriple().isSPIRAOT() && + !DeviceLibDisable) { + SYCLInstallationDetector SYCLInstall(TC.getDriver()); + SmallVector, 4> DeviceLibLocCandidates; + SmallString<128> FallbackAssertName("libsycl-fallback-cassert.bc"); + SYCLInstall.getSYCLDeviceLibPath(DeviceLibLocCandidates); + for (const auto &DeviceLibLoc : DeviceLibLocCandidates) { + SmallString<128> FullLibName(DeviceLibLoc); + llvm::sys::path::append(FullLibName, FallbackAssertName); + if (llvm::sys::fs::exists(FullLibName)) { + SmallString<128> SYCLDeviceLibDir("--device-lib-dir="); + SYCLDeviceLibDir += DeviceLibLoc.str(); + addArgs(PostLinkArgs, TCArgs, {SYCLDeviceLibDir.str()}); + break; + } + } + } } // On Intel targets we don't need non-kernel functions as entry points, diff --git a/clang/test/Driver/sycl-post-link-options-win.cpp b/clang/test/Driver/sycl-post-link-options-win.cpp index 7c641cea6783d..137ee1a89de4b 100644 --- a/clang/test/Driver/sycl-post-link-options-win.cpp +++ b/clang/test/Driver/sycl-post-link-options-win.cpp @@ -3,7 +3,7 @@ // RUN: %clangxx -### --target=x86_64-pc-windows-msvc -fsycl \ // RUN: -Xdevice-post-link -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s -// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" +// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" // ------- // Generate .o file as linker wrapper input. // diff --git a/clang/test/Driver/sycl-post-link-options.cpp b/clang/test/Driver/sycl-post-link-options.cpp index c343f4685aaa1..8f8fe14325d47 100644 --- a/clang/test/Driver/sycl-post-link-options.cpp +++ b/clang/test/Driver/sycl-post-link-options.cpp @@ -3,7 +3,7 @@ // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl -### \ // RUN: --no-offload-new-driver -Xdevice-post-link -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s -// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" +// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" // // Generate .o file as linker wrapper input. // diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index a8ed3442b4ca9..8643001d25fc3 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -726,6 +726,16 @@ runSYCLPostLinkTool(ArrayRef InputFiles, const ArgList &Args) { SmallVector CmdArgs; CmdArgs.push_back(*SYCLPostLinkPath); const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ); + if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) { + std::string SYCLDeviceLibLocParam = SYCLDeviceLibLoc->getValue(); + std::string AssertDeviceLibLoc = + SYCLDeviceLibLocParam + "/libsycl-fallback-cassert.bc"; + if (llvm::sys::fs::exists(AssertDeviceLibLoc)) { + SYCLDeviceLibLocParam = "--device-lib-dir=" + SYCLDeviceLibLocParam; + CmdArgs.push_back(Args.MakeArgString(StringRef(SYCLDeviceLibLocParam))); + } + } getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple); StringRef SYCLPostLinkOptions; if (Arg *A = Args.getLastArg(OPT_sycl_post_link_options_EQ)) diff --git a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h index e7cff6c730051..5d0b8fbb2eed9 100644 --- a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h @@ -12,6 +12,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/SYCLLowerIR/ModuleSplitter.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/Support/PropertySetIO.h" #include namespace llvm { @@ -36,6 +37,9 @@ PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, const GlobalBinImageProps &GlobProps); +PropSetRegTy computeSYCLDeviceLibProperties(const Module &M, + std::string &SYCLDeviceLibName); + std::string computeModuleSymbolTable(const Module &M, const EntryPointSet &EntryPoints); diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h index c9b737e2d053a..8f3718c60ea88 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h @@ -17,6 +17,8 @@ #pragma once #include +#include +#include namespace llvm { @@ -39,5 +41,7 @@ enum class DeviceLibExt : std::uint32_t { }; uint32_t getSYCLDeviceLibReqMask(const Module &M); - +void getSYCLDeviceLibReqNames(unsigned int ReqMask, + std::vector &ReqNames); +unsigned int getSYCLDeviceLibMeta(std::string &DeviceLibFn); } // namespace llvm diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index f93a6d605c8ce..c4002ceebe24b 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -200,6 +200,7 @@ class PropertySetRegistry { static constexpr char SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[] = "SYCL/specialization constants default values"; static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask"; + static constexpr char SYCL_DEVICELIB_METADATA[] = "SYCL/devicelib metadata"; static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt"; static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata"; static constexpr char SYCL_MISC_PROP[] = "SYCL/misc properties"; diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index c61728bc511fb..56736de52bef4 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -151,16 +151,37 @@ std::optional getKernelSingleEltMetadata(const Function &Func, return std::nullopt; } +PropSetRegTy computeSYCLDeviceLibProperties(const Module &M, + std::string &SYCLDeviceLibName) { + PropSetRegTy PropSet; + + { + auto SYCLDeviceLibMeta = getSYCLDeviceLibMeta(SYCLDeviceLibName); + std::map RMEntry = { + {"DeviceLibMetaData", SYCLDeviceLibMeta}}; + PropSet.add(PropSetRegTy::SYCL_DEVICELIB_METADATA, RMEntry); + } + + { + for (const auto &F : M.functions()) { + if (!F.getName().starts_with("__devicelib_") || F.isDeclaration()) + continue; + if (F.getCallingConv() == CallingConv::SPIR_FUNC) { + PropSet.add(PropSetRegTy::SYCL_EXPORTED_SYMBOLS, F.getName(), + /*PropVal=*/true); + } + } + } + + return PropSet; +} + PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, const GlobalBinImageProps &GlobProps) { PropSetRegTy PropSet; - { - uint32_t MRMask = getSYCLDeviceLibReqMask(M); - std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); - } + { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, computeDeviceRequirements(M, EntryPoints).asMap()); @@ -461,7 +482,7 @@ PropSetRegTy computeModuleProperties(const Module &M, } PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, - "uses-virtual-functions-set", AllSets); + "uses-virtual-functions-set", AllSets); } } diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index 904424f93dae6..1617386c42b98 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -1402,6 +1402,9 @@ splitSYCLModule(std::unique_ptr M, ModuleSplitterSettings Settings) { } bool canBeImportedFunction(const Function &F) { + + if (F.getName().starts_with("__devicelib_") && F.isDeclaration()) + return true; // It may be theoretically possible to determine what is importable // based solely on function F, but the "SYCL/imported symbols" // property list MUST NOT have any imported symbols that are not supplied diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp index 12914d3763521..6a4f612028f80 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp @@ -757,6 +757,28 @@ uint32_t getDeviceLibBits(const std::string &FuncName) { } // namespace +static std::unordered_map> + SYCLDeviceLibFn = { + {DeviceLibExt::cl_intel_devicelib_assert, + {"libsycl-fallback-cassert.bc"}}, + {DeviceLibExt::cl_intel_devicelib_math, {"libsycl-fallback-cmath.bc"}}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, + {"libsycl-fallback-cmath-fp64.bc"}}, + {DeviceLibExt::cl_intel_devicelib_complex, + {"libsycl-fallback-complex.bc"}}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, + {"libsycl-fallback-complex-fp64.bc"}}, + {DeviceLibExt::cl_intel_devicelib_cstring, + {"libsycl-fallback-cstring.bc"}}, + {DeviceLibExt::cl_intel_devicelib_imf, {"libsycl-fallback-imf.bc"}}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, + {"libsycl-fallback-imf-fp64.bc"}}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, + {"libsycl-fallback-imf-bf16.bc"}}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, + {"libsycl-fallback-bfloat16.bc", "libsycl-native-bfloat16.bc"}}, +}; + // For each device image module, we go through all functions which meets // 1. The function name has prefix "__devicelib_" // 2. The function is declaration which means it doesn't have function body @@ -775,3 +797,55 @@ uint32_t llvm::getSYCLDeviceLibReqMask(const Module &M) { } return ReqMask; } + +void llvm::getSYCLDeviceLibReqNames(unsigned int ReqMask, + std::vector &ReqNames) { + DeviceLibExt DeviceLibExts[] = {DeviceLibExt::cl_intel_devicelib_assert, + DeviceLibExt::cl_intel_devicelib_math, + DeviceLibExt::cl_intel_devicelib_math_fp64, + DeviceLibExt::cl_intel_devicelib_complex, + DeviceLibExt::cl_intel_devicelib_complex_fp64, + DeviceLibExt::cl_intel_devicelib_cstring, + DeviceLibExt::cl_intel_devicelib_imf, + DeviceLibExt::cl_intel_devicelib_imf_fp64, + DeviceLibExt::cl_intel_devicelib_imf_bf16, + DeviceLibExt::cl_intel_devicelib_bfloat16}; + + unsigned int Temp; + for (auto Ext : DeviceLibExts) { + Temp = + 0x1 << (static_cast(Ext) - + static_cast(DeviceLibExt::cl_intel_devicelib_assert)); + if (Temp & ReqMask) { + for (auto Fn : SYCLDeviceLibFn[Ext]) + ReqNames.push_back(std::string(Fn)); + } + } +} + +unsigned int llvm::getSYCLDeviceLibMeta(std::string &DeviceLibFn) { + DeviceLibExt DeviceLibExts[] = {DeviceLibExt::cl_intel_devicelib_assert, + DeviceLibExt::cl_intel_devicelib_math, + DeviceLibExt::cl_intel_devicelib_math_fp64, + DeviceLibExt::cl_intel_devicelib_complex, + DeviceLibExt::cl_intel_devicelib_complex_fp64, + DeviceLibExt::cl_intel_devicelib_cstring, + DeviceLibExt::cl_intel_devicelib_imf, + DeviceLibExt::cl_intel_devicelib_imf_fp64, + DeviceLibExt::cl_intel_devicelib_imf_bf16, + DeviceLibExt::cl_intel_devicelib_bfloat16}; + + unsigned int DeviceLibMeta = 0; + for (auto Ext : DeviceLibExts) { + for (auto Fn : SYCLDeviceLibFn[Ext]) { + if (DeviceLibFn == Fn) { + DeviceLibMeta = static_cast(Ext); + if (DeviceLibFn == "libsycl-native-bfloat16.bc") + DeviceLibMeta |= 0x80000000; + break; + } + } + } + + return DeviceLibMeta; +} diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index ca8b014c97a0e..5daa8a5eea066 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -196,6 +196,7 @@ void PropertyValue::copy(const PropertyValue &P) { constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[]; constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[]; +constexpr char PropertySetRegistry::SYCL_DEVICELIB_METADATA[]; constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[]; constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[]; constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[]; diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 91083cea614c7..a15add8482ca1 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -12,7 +12,6 @@ // - module splitter to split a big input module into smaller ones // - specialization constant intrinsic transformation //===----------------------------------------------------------------------===// - #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" @@ -104,6 +103,11 @@ cl::opt OutputDir{ "Directory where files listed in the result file table will be output"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; +cl::opt DeviceLibDir{ + "device-lib-dir", + cl::desc("Directory where sycl fallback device libraries reside"), + cl::value_desc("dirname"), cl::cat(PostLinkCat)}; + struct TargetFilenamePair { std::string Target; std::string Filename; @@ -306,9 +310,16 @@ std::string saveModuleIR(Module &M, int I, StringRef Suff) { std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, - StringRef Suff, StringRef Target = "") { - auto PropSet = - computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); + StringRef Suff, StringRef Target = "", + bool IsDeviceLib = false) { + PropSetRegTy PropSet; + + // For fallback devicelib module, no kernel included and no specialization + // constant used, skip regular Prop emit. + if (!IsDeviceLib) + PropSet = computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); + else + PropSet = computeSYCLDeviceLibProperties(MD.getModule(), MD.Name); // When the split mode is none, the required work group size will be added // to the whole module, which will make the runtime unable to @@ -425,17 +436,24 @@ void addTableRow(util::SimpleTable &Table, // IR component saving is skipped, and this file name is recorded as such in // the result. void saveModule(std::vector> &OutTables, - module_split::ModuleDesc &MD, int I, StringRef IRFilename) { + module_split::ModuleDesc &MD, int I, StringRef IRFilename, + bool IsDeviceLib = false) { IrPropSymFilenameTriple BaseTriple; StringRef Suffix = getModuleSuffix(MD); MD.saveSplitInformationAsMetadata(); - if (!IRFilename.empty()) { - // don't save IR, just record the filename - BaseTriple.Ir = IRFilename.str(); + if (!IsDeviceLib) { + if (!IRFilename.empty()) { + // don't save IR, just record the filename + BaseTriple.Ir = IRFilename.str(); + } else { + MD.cleanup(); + BaseTriple.Ir = saveModuleIR(MD.getModule(), I, Suffix); + } } else { - MD.cleanup(); + // For DeviceLib Modules, don't need to do clean up. BaseTriple.Ir = saveModuleIR(MD.getModule(), I, Suffix); } + if (DoSymGen) { // save the names of the entry points - the symbol table BaseTriple.Sym = saveModuleSymbolTable(MD, I, Suffix); @@ -449,13 +467,24 @@ void saveModule(std::vector> &OutTables, GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, EmitExportedSymbols, EmitImportedSymbols, DeviceGlobals}; - CopyTriple.Prop = - saveModuleProperties(MD, Props, I, Suffix, OutputFile.Target); + CopyTriple.Prop = saveModuleProperties(MD, Props, I, Suffix, + OutputFile.Target, IsDeviceLib); } addTableRow(*Table, CopyTriple); } } +void saveDeviceLibModule( + std::vector> &OutTables, + const std::string &IRFile, int I, LLVMContext &Context) { + SMDiagnostic Err; + StringRef DeviceLibLoc = DeviceLibDir; + std::string IRPath = DeviceLibLoc.str() + "/" + IRFile; + std::unique_ptr IRModule = parseIRFile(IRPath, Err, Context); + llvm::module_split::ModuleDesc LibIRMD(std::move(IRModule), IRFile); + saveModule(OutTables, LibIRMD, I, IRFile, true); +} + module_split::ModuleDesc link(module_split::ModuleDesc &&MD1, module_split::ModuleDesc &&MD2) { std::vector Names; @@ -751,7 +780,7 @@ bool isTargetCompatibleWithModule(const std::string &Target, } std::vector> -processInputModule(std::unique_ptr M) { +processInputModule(std::unique_ptr M, LLVMContext &Context) { // Construct the resulting table which will accumulate all the outputs. SmallVector ColumnTitles{ StringRef(COL_CODE)}; @@ -780,6 +809,9 @@ processInputModule(std::unique_ptr M) { // if none were made. bool Modified = false; + // Keeps track of required device libraries by all device images. + unsigned int DeviceLibReqMask = 0; + // Propagate ESIMD attribute to wrapper functions to prevent // spurious splits and kernel link errors. Modified |= runModulePass(*M); @@ -891,6 +923,7 @@ processInputModule(std::unique_ptr M) { "have been made\n"; } for (module_split::ModuleDesc &IrMD : MMs) { + DeviceLibReqMask |= getSYCLDeviceLibReqMask(IrMD.getModule()); saveModule(Tables, IrMD, ID, OutIRFileName); } @@ -899,12 +932,22 @@ processInputModule(std::unique_ptr M) { if (!MMsWithDefaultSpecConsts.empty()) { for (size_t i = 0; i != MMsWithDefaultSpecConsts.size(); ++i) { module_split::ModuleDesc &IrMD = MMsWithDefaultSpecConsts[i]; + DeviceLibReqMask |= getSYCLDeviceLibReqMask(IrMD.getModule()); saveModule(Tables, IrMD, ID, OutIRFileName); } ++ID; } } + + if ((DeviceLibReqMask > 0) && (DeviceLibDir.getNumOccurrences() > 0)) { + string_vector DeviceLibReqNames; + getSYCLDeviceLibReqNames(DeviceLibReqMask, DeviceLibReqNames); + for (auto Fn : DeviceLibReqNames) { + saveDeviceLibModule(Tables, Fn, ID, Context); + ++ID; + } + } return Tables; } @@ -1048,7 +1091,7 @@ int main(int argc, char **argv) { } std::vector> Tables = - processInputModule(std::move(M)); + processInputModule(std::move(M), Context); // Input module was processed and a single output file was requested. if (IROutputOnly) diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 40bf97299138f..f870642a6573c 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -46,6 +46,8 @@ "SYCL/specialization constants default values" /// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" +/// PropertySetRegistry::SYCL_DEVICELIB_METADATA defined in PropertySetIO.h +#define __SYCL_PROPERTY_SET_DEVICELIB_METADATA "SYCL/devicelib metadata" /// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt" /// PropertySetRegistry::SYCL_KERNEL_PROGRAM_METADATA defined in PropertySetIO.h diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 633a4269e1e78..c89ce8c2d1808 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -180,6 +180,7 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { SpecConstDefaultValuesMap.init( Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); DeviceLibReqMask.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); + DeviceLibMetaData.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_METADATA); KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); AssertUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); ImplicitLocalArg.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPLICIT_LOCAL_ARG); diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 9aab698ef2749..84e88b4a2a71e 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -215,6 +215,9 @@ class RTDeviceBinaryImage { return SpecConstDefaultValuesMap; } const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; } + const PropertyRange &getDeviceLibMetaData() const { + return DeviceLibMetaData; + } const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } @@ -248,6 +251,7 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange SpecConstIDMap; RTDeviceBinaryImage::PropertyRange SpecConstDefaultValuesMap; RTDeviceBinaryImage::PropertyRange DeviceLibReqMask; + RTDeviceBinaryImage::PropertyRange DeviceLibMetaData; RTDeviceBinaryImage::PropertyRange KernelParamOptInfo; RTDeviceBinaryImage::PropertyRange AssertUsed; RTDeviceBinaryImage::PropertyRange ProgramMetadata; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 035ca965ce2e5..00300b620c194 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -603,6 +603,74 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, return (0 == SuitableImageID); } +// For each extension understood by the SYCL runtime, the string representation +// of its name. Names with devicelib in them are internal to the runtime. Others +// are actual OpenCL extensions. +static const std::map DeviceLibExtensionStrs = { + {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"}, + {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, + "cl_intel_devicelib_math_fp64"}, + {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, + "cl_intel_devicelib_complex_fp64"}, + {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"}, + {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, + "cl_intel_bfloat16_conversions"}}; + +static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { + auto Ext = DeviceLibExtensionStrs.find(Extension); + if (Ext == DeviceLibExtensionStrs.end()) + throw exception(make_error_code(errc::build), + "Unhandled (new?) device library extension"); + return Ext->second; +} + +static bool skipFallbackSYCLDeviceLib(RTDeviceBinaryImage *Img, + const device &Dev) { + const RTDeviceBinaryImage::PropertyRange &DeviceLibMetaProp = + Img->getDeviceLibMetaData(); + unsigned DeviceLibMetaData = + DeviceBinaryProperty(*(DeviceLibMetaProp.begin())).asUint32(); + DeviceLibExt LibExt = + static_cast(DeviceLibMetaData & 0x7FFFFFFF); + + std::vector DeviceExtensions = + Dev.get_info(); + + bool FP64Supported = + (std::find(DeviceExtensions.begin(), DeviceExtensions.end(), + "cl_khr_fp64") != DeviceExtensions.end()); + if ((LibExt == DeviceLibExt::cl_intel_devicelib_math_fp64 || + LibExt == DeviceLibExt::cl_intel_devicelib_complex_fp64 || + LibExt == DeviceLibExt::cl_intel_devicelib_imf_fp64) && + !FP64Supported) + return true; + + const char *ExtName = getDeviceLibExtensionStr(LibExt); + bool NativeSupported = + (std::find(DeviceExtensions.begin(), DeviceExtensions.end(), ExtName) != + DeviceExtensions.end()); + + bool InhibitNativeImpl = false; + if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { + InhibitNativeImpl = strstr(Env, ExtName) != nullptr; + } + + if (NativeSupported && !InhibitNativeImpl) + return true; + + if (LibExt == DeviceLibExt::cl_intel_devicelib_bfloat16) { + bool IsNative = ((DeviceLibMetaData & 0x80000000) != 0); + if (IsNative != NativeSupported) + return true; + } + return false; +} + static bool checkLinkingSupport(const device &Dev, const RTDeviceBinaryImage &Img) { const char *Target = Img.getRawData().DeviceTargetSpec; @@ -659,6 +727,9 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( !doesDevSupportDeviceRequirements(Dev, *Img) || !compatibleWithDevice(Img, Dev)) continue; + if (Img->getDeviceLibMetaData().isAvailable() && + skipFallbackSYCLDeviceLib(Img, Dev)) + continue; DeviceImagesToLink.insert(Img); Found = true; for (const sycl_device_binary_property &ISProp : @@ -768,7 +839,8 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } } -static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +static inline void +CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) @@ -1227,32 +1299,6 @@ static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) { return Lib; } -// For each extension understood by the SYCL runtime, the string representation -// of its name. Names with devicelib in them are internal to the runtime. Others -// are actual OpenCL extensions. -static const std::map DeviceLibExtensionStrs = { - {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"}, - {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, - "cl_intel_devicelib_math_fp64"}, - {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, - "cl_intel_devicelib_complex_fp64"}, - {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"}, - {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, - "cl_intel_bfloat16_conversions"}}; - -static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { - auto Ext = DeviceLibExtensionStrs.find(Extension); - if (Ext == DeviceLibExtensionStrs.end()) - throw exception(make_error_code(errc::build), - "Unhandled (new?) device library extension"); - return Ext->second; -} - static ur_result_t doCompile(const AdapterPtr &Adapter, ur_program_handle_t Program, uint32_t NumDevs, ur_device_handle_t *Devs, ur_context_handle_t Ctx, @@ -1809,8 +1855,14 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { const sycl_offload_entry EntriesB = RawImg->EntriesBegin; const sycl_offload_entry EntriesE = RawImg->EntriesEnd; // Treat the image as empty one - if (EntriesB == EntriesE) - continue; + if (EntriesB == EntriesE) { + std::unique_ptr Img = + std::make_unique(RawImg); + const RTDeviceBinaryImage::PropertyRange &DeviceLibMetaProp = + Img->getDeviceLibMetaData(); + if (!DeviceLibMetaProp.isAvailable()) + continue; + } std::unique_ptr Img; if (isDeviceImageCompressed(RawImg))