From 3b2ba9f0a603d82673008c7c05826f38f9f8ea85 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 13 Nov 2024 15:03:57 +0800 Subject: [PATCH 01/37] [SYCL] Embed required fallback spv into final executable --- clang/lib/Driver/ToolChains/Clang.cpp | 1 + clang/lib/Driver/ToolChains/SYCL.cpp | 29 ++++++++++++++++++-- clang/lib/Driver/ToolChains/SYCL.h | 3 +- llvm/tools/sycl-post-link/sycl-post-link.cpp | 6 ++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f90ba124e5a09..494c81c743ce3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10921,6 +10921,7 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA, const llvm::opt::ArgList &TCArgs, ArgStringList &PostLinkArgs) { + // See if device code splitting is requested if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_device_code_split_EQ)) { auto CodeSplitValue = StringRef(A->getValue()); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 7e8067f5ec2e4..cdef44ff472b5 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + #include "SYCL.h" #include "CommonArgs.h" #include "clang/Driver/Action.h" @@ -152,14 +153,36 @@ SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D) } void SYCLInstallationDetector::getSYCLDeviceLibPath( - llvm::SmallVector, 4> &DeviceLibPaths) const { + llvm::SmallVector, 4> &DeviceLibPaths, + bool GetSPV) const { + auto TargetTriple = llvm::Triple(D.getTargetTriple()); for (const auto &IC : InstallationCandidates) { llvm::SmallString<128> InstallLibPath(IC.str()); - InstallLibPath.append("/lib"); + if (GetSPV && TargetTriple.isOSWindows()) + InstallLibPath.append("/bin"); + else + InstallLibPath.append("/lib"); + DeviceLibPaths.emplace_back(InstallLibPath); + } + + if (!GetSPV || !TargetTriple.isOSWindows()) + DeviceLibPaths.emplace_back(D.SysRoot + "/lib"); +} + +void SYCLInstallationDetector::getSYCLDeviceLibPath( + llvm::SmallVector, 4> &DeviceLibPaths, bool GetSPV) const { + auto TargetTriple = llvm::Triple(D.getTargetTriple()); + for (const auto &IC : InstallationCandidates) { + llvm::SmallString<128> InstallLibPath(IC.str()); + if (GetSPV && TargetTriple.isOSWindows()) + InstallLibPath.append("/bin"); + else + InstallLibPath.append("/lib"); DeviceLibPaths.emplace_back(InstallLibPath); } - DeviceLibPaths.emplace_back(D.SysRoot + "/lib"); + if (!GetSPV || !TargetTriple.isOSWindows()) + DeviceLibPaths.emplace_back(D.SysRoot + "/lib"); } void SYCLInstallationDetector::AddSYCLIncludeArgs( diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 538416e06171d..9982ea8caa16c 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -129,7 +129,8 @@ class SYCLInstallationDetector { public: SYCLInstallationDetector(const Driver &D); void getSYCLDeviceLibPath( - llvm::SmallVector, 4> &DeviceLibPaths) const; + llvm::SmallVector, 4> &DeviceLibPaths, + bool GetSPV = false) const; void AddSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const; void print(llvm::raw_ostream &OS) const; diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 3800c5875e44f..b63a7668c97dd 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -104,6 +104,12 @@ cl::opt OutputDir{ "Directory where files listed in the result file table will be output"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; +cl::opt DeviceLibDir{ + "device-lib-dir", + cl::desc( + "Directory where sycl fallback device libraries reside"), + cl::value_desc("dirname"), cl::cat(PostLinkCat)}; + struct TargetFilenamePair { std::string Target; std::string Filename; From 78a7013fa301539062c9f8563e804b53bad02988 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 13 Nov 2024 15:07:27 +0800 Subject: [PATCH 02/37] remove function redefinition Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/SYCL.cpp | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 4ab405d7ba9fb..76878bbde13bf 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -169,22 +169,6 @@ void SYCLInstallationDetector::getSYCLDeviceLibPath( DeviceLibPaths.emplace_back(D.SysRoot + "/lib"); } -void SYCLInstallationDetector::getSYCLDeviceLibPath( - llvm::SmallVector, 4> &DeviceLibPaths, bool GetSPV) const { - auto TargetTriple = llvm::Triple(D.getTargetTriple()); - for (const auto &IC : InstallationCandidates) { - llvm::SmallString<128> InstallLibPath(IC.str()); - if (GetSPV && TargetTriple.isOSWindows()) - InstallLibPath.append("/bin"); - else - InstallLibPath.append("/lib"); - DeviceLibPaths.emplace_back(InstallLibPath); - } - - if (!GetSPV || !TargetTriple.isOSWindows()) - DeviceLibPaths.emplace_back(D.SysRoot + "/lib"); -} - void SYCLInstallationDetector::AddSYCLIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { // Add the SYCL header search locations in the specified order. From 243f7596174bd6548202770d715919adc72c922f Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 13 Nov 2024 15:17:32 +0800 Subject: [PATCH 03/37] fix clang format Signed-off-by: jinge90 --- llvm/tools/sycl-post-link/sycl-post-link.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index b63a7668c97dd..464f402baf85f 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -106,8 +106,7 @@ cl::opt OutputDir{ cl::opt DeviceLibDir{ "device-lib-dir", - cl::desc( - "Directory where sycl fallback device libraries reside"), + cl::desc("Directory where sycl fallback device libraries reside"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; struct TargetFilenamePair { From 9a2578023769c391d959317a9d10222d607c0698 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 13 Nov 2024 17:19:08 +0800 Subject: [PATCH 04/37] Pass devicelib path to sycl-post-link Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/Clang.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 494c81c743ce3..c47985761fdf7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10947,6 +10947,21 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (allowDeviceImageDependencies(TCArgs)) addArgs(PostLinkArgs, TCArgs, {"-allow-device-image-dependencies"}); + + SYCLInstallationDetector SYCLInstall(TC.getDriver()); + SmallVector, 4> SpvLocCandidates; + SmallString<128> FallbackAssertName("libsycl-fallback-cassert.spv"); + SYCLInstall.getSYCLDeviceLibPath(SpvLocCandidates, true); + for (const auto &SpvLoc : SpvLocCandidates) { + SmallString<128> FullLibName(SpvLoc); + llvm::sys::path::append(FullLibName, FallbackAssertName); + if (llvm::sys::fs::exists(FullLibName)) { + SmallString<128> SYCLDeviceLibDir("--device-lib-dir="); + SYCLDeviceLibDir += SpvLoc.str(); + addArgs(PostLinkArgs, TCArgs, {SYCLDeviceLibDir.str()}); + break; + } + } } // On Intel targets we don't need non-kernel functions as entry points, From aa62091efa507a9478948a8c965e9e6b6a69f328 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 14 Nov 2024 11:39:02 +0800 Subject: [PATCH 05/37] pass sycl device lib spv dir to sycl-post-link in clang linker wrapper Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- .../clang-linker-wrapper/ClangLinkerWrapper.cpp | 14 ++++++++++++++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 4 ++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c47985761fdf7..81b7e4a1bc912 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10956,7 +10956,7 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, SmallString<128> FullLibName(SpvLoc); llvm::sys::path::append(FullLibName, FallbackAssertName); if (llvm::sys::fs::exists(FullLibName)) { - SmallString<128> SYCLDeviceLibDir("--device-lib-dir="); + SmallString<128> SYCLDeviceLibDir("--device-lib-spv-dir="); SYCLDeviceLibDir += SpvLoc.str(); addArgs(PostLinkArgs, TCArgs, {SYCLDeviceLibDir.str()}); break; diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index ec883c1091196..9d0cfaa8a7209 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -727,6 +727,20 @@ runSYCLPostLinkTool(ArrayRef InputFiles, const ArgList &Args) { SmallVector CmdArgs; CmdArgs.push_back(*SYCLPostLinkPath); const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ); + if (SYCLDeviceLibLoc) { + std::string SYCLDeviceLibSPVLoc = SYCLDeviceLibLoc->getValue(); + llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ)); + if (HostTriple.isOSWindows()) + SYCLDeviceLibSPVLoc += "../bin"; + + std::string AssertSPVLoc = + SYCLDeviceLibSPVLoc + "/libsycl-fallback-cassert.spv"; + if (llvm::sys::fs::exists(AssertSPVLoc)) { + SYCLDeviceLibSPVLoc = "--device-lib-spv-dir=" + SYCLDeviceLibSPVLoc; + CmdArgs.push_back(Args.MakeArgString(StringRef(SYCLDeviceLibSPVLoc))); + } + } getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple); StringRef SYCLPostLinkOptions; if (Arg *A = Args.getLastArg(OPT_sycl_post_link_options_EQ)) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 464f402baf85f..84ee5246c5647 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -105,8 +105,8 @@ cl::opt OutputDir{ cl::value_desc("dirname"), cl::cat(PostLinkCat)}; cl::opt DeviceLibDir{ - "device-lib-dir", - cl::desc("Directory where sycl fallback device libraries reside"), + "device-lib-spv-dir", + cl::desc("Directory where sycl fallback spirv device libraries reside"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; struct TargetFilenamePair { From b9b53ee8857af501fbd7fdfd75079f10b2d885ce Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 14 Nov 2024 16:37:01 +0800 Subject: [PATCH 06/37] Remove sycl devicelib require mask collection Signed-off-by: jinge90 --- .../SYCLLowerIR/ComputeModuleRuntimeInfo.h | 3 +- ...eLibReqMask.h => SYCLRequiredDeviceLibs.h} | 7 +- llvm/include/llvm/Support/PropertySetIO.h | 2 +- llvm/lib/SYCLLowerIR/CMakeLists.txt | 2 +- .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 14 ++-- ...ReqMask.cpp => SYCLRequiredDeviceLibs.cpp} | 69 ++++++++++--------- llvm/lib/Support/PropertySetIO.cpp | 2 +- llvm/tools/sycl-post-link/sycl-post-link.cpp | 8 ++- 8 files changed, 63 insertions(+), 44 deletions(-) rename llvm/include/llvm/SYCLLowerIR/{SYCLDeviceLibReqMask.h => SYCLRequiredDeviceLibs.h} (84%) rename llvm/lib/SYCLLowerIR/{SYCLDeviceLibReqMask.cpp => SYCLRequiredDeviceLibs.cpp} (96%) diff --git a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h index e7cff6c730051..a1402f74faab9 100644 --- a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h @@ -34,7 +34,8 @@ using EntryPointSet = SetVector; PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, - const GlobalBinImageProps &GlobProps); + const GlobalBinImageProps &GlobProps, + const StringRef &DeviceLibSPVLoc); std::string computeModuleSymbolTable(const Module &M, const EntryPointSet &EntryPoints); diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h b/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h similarity index 84% rename from llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h rename to llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h index c9b737e2d053a..1bebe00146d96 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h @@ -1,4 +1,4 @@ -//===----- SYCLDeviceLibReqMask.h - get SYCL devicelib required Info -----=-==// +//===----- SYCLRequiredDeviceLibs.h - get SYCL devicelib required Info ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,6 +16,8 @@ #pragma once +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringRef.h" #include namespace llvm { @@ -38,6 +40,7 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_bfloat16, }; -uint32_t getSYCLDeviceLibReqMask(const Module &M); +void getRequiredSYCLDeviceLibs(const Module &M, + SmallVector &ReqLibs); } // namespace llvm diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 13cb687f3b08b..86f91483a0c5d 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -199,7 +199,7 @@ class PropertySetRegistry { "SYCL/specialization constants"; static constexpr char SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[] = "SYCL/specialization constants default values"; - static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask"; + static constexpr char SYCL_DEVICELIB_REQ_BINS[] = "SYCL/devicelib req bins"; static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt"; static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata"; static constexpr char SYCL_MISC_PROP[] = "SYCL/misc properties"; diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 0ce2a91f91a29..bb2c230a2ea4b 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -61,7 +61,7 @@ add_llvm_component_library(LLVMSYCLLowerIR SYCLAddOptLevelAttribute.cpp SYCLConditionalCallOnDevice.cpp SYCLCreateNVVMAnnotations.cpp - SYCLDeviceLibReqMask.cpp + SYCLRequiredDeviceLibs.cpp SYCLDeviceRequirements.cpp SYCLKernelParamOptInfo.cpp SYCLJointMatrixTransform.cpp diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index cc287b9101fa8..7855a873852b4 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -17,12 +17,13 @@ #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/ModuleSplitter.h" -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" +#include "llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h" #include "llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SpecConstants.h" #include #include + #ifndef NDEBUG constexpr int DebugModuleProps = 0; #endif @@ -161,14 +162,17 @@ std::optional getKernelSingleEltMetadata(const Function &Func, PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, - const GlobalBinImageProps &GlobProps) { + const GlobalBinImageProps &GlobProps, + const StringRef &DeviceLibSPVLoc) { PropSetRegTy PropSet; { - uint32_t MRMask = getSYCLDeviceLibReqMask(M); - std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); + SmallVector RequiredLibs; + llvm::getRequiredSYCLDeviceLibs(M, RequiredLibs); + for (auto RL : RequiredLibs) { + } } + { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, computeDeviceRequirements(M, EntryPoints).asMap()); diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp b/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp similarity index 96% rename from llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp rename to llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp index 12914d3763521..ae5c9918ad184 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp @@ -1,4 +1,4 @@ -//==----- SYCLDeviceLibReqMask.cpp - get SYCL devicelib required Info ------==// +//==---- SYCLRequiredDeviceLibs.cpp - get SYCL devicelib required Info -----==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,7 +14,8 @@ // SYCL runtime later. //===----------------------------------------------------------------------===// -#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" +#include "llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/IR/Module.h" #include "llvm/TargetParser/Triple.h" @@ -732,46 +733,52 @@ SYCLDeviceLibFuncMap SDLMap = { DeviceLibExt::cl_intel_devicelib_bfloat16}, }; -// Each fallback device library corresponds to one bit in "require mask" which -// is an unsigned int32. getDeviceLibBit checks which fallback device library -// is required for FuncName and returns the corresponding bit. The corresponding -// mask for each fallback device library is: -// cl_intel_devicelib_assert: 0x1 -// cl_intel_devicelib_math: 0x2 -// cl_intel_devicelib_math_fp64: 0x4 -// cl_intel_devicelib_complex: 0x8 -// cl_intel_devicelib_complex_fp64: 0x10 -// cl_intel_devicelib_cstring : 0x20 -// cl_intel_devicelib_imf: 0x40 -// cl_intel_devicelib_imf_fp64: 0x80 -// cl_intel_devicelib_imf_bf16: 0x100 -// cl_intel_devicelib_bfloat16: 0x200 -uint32_t getDeviceLibBits(const std::string &FuncName) { - auto DeviceLibFuncIter = SDLMap.find(FuncName); - return ((DeviceLibFuncIter == SDLMap.end()) - ? 0 - : 0x1 << (static_cast(DeviceLibFuncIter->second) - - static_cast( - DeviceLibExt::cl_intel_devicelib_assert))); -} - } // namespace +// Each fallback device library corresponds to one SPV file whose name is kept +// in DeviceLibSPVExtMap. +static std::unordered_map DeviceLibSPVExtMap = { + {DeviceLibExt::cl_intel_devicelib_assert, "libsycl-fallback-cassert.spv"}, + {DeviceLibExt::cl_intel_devicelib_math, "libsycl-fallback-cmath.spv"}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, + "libsycl-fallback-cmath-fp64.spv"}, + {DeviceLibExt::cl_intel_devicelib_complex, "libsycl-fallback-complex.spv"}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, + "libsycl-fallback-complex-fp64.spv"}, + {DeviceLibExt::cl_intel_devicelib_cstring, "libsycl-fallback-cstring.spv"}, + {DeviceLibExt::cl_intel_devicelib_imf, "libsycl-fallback-imf.spv"}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, + "libsycl-fallback-imf-fp64.spv"}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, + "libsycl-fallback-imf-bf16.spv"}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, + "libsycl-fallback-bfloat16.spv"}}; + +namespace llvm { // For each device image module, we go through all functions which meets // 1. The function name has prefix "__devicelib_" // 2. The function is declaration which means it doesn't have function body // And we don't expect non-spirv functions with "__devicelib_" prefix. -uint32_t llvm::getSYCLDeviceLibReqMask(const Module &M) { +void getRequiredSYCLDeviceLibs( + const Module &M, llvm::SmallVector &ReqDeviceLibs) { // Device libraries will be enabled only for spir-v module. if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) - return 0; - uint32_t ReqMask = 0; + return; + + SmallSet DeviceLibUsed; for (const Function &SF : M) { if (SF.getName().starts_with(DEVICELIB_FUNC_PREFIX) && SF.isDeclaration()) { assert(SF.getCallingConv() == CallingConv::SPIR_FUNC); - uint32_t DeviceLibBits = getDeviceLibBits(SF.getName().str()); - ReqMask |= DeviceLibBits; + auto DeviceLibFuncIter = SDLMap.find(SF.getName().str()); + if (DeviceLibFuncIter == SDLMap.end()) + continue; + if (DeviceLibUsed.contains(DeviceLibFuncIter->second)) + continue; + + DeviceLibUsed.insert(DeviceLibFuncIter->second); + ReqDeviceLibs.push_back(DeviceLibSPVExtMap[DeviceLibFuncIter->second]); } } - return ReqMask; } + +} // namespace llvm diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index 2fe7cac00fb14..edc46dcbb0c13 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -195,7 +195,7 @@ void PropertyValue::copy(const PropertyValue &P) { } constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[]; -constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[]; +constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_BINS[]; constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[]; constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[]; constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[]; diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 84ee5246c5647..5c8ee7bbe466b 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -104,7 +104,7 @@ cl::opt OutputDir{ "Directory where files listed in the result file table will be output"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; -cl::opt DeviceLibDir{ +cl::opt DeviceLibSPVDir{ "device-lib-spv-dir", cl::desc("Directory where sycl fallback spirv device libraries reside"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; @@ -312,8 +312,12 @@ std::string saveModuleIR(Module &M, int I, StringRef Suff) { std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, StringRef Suff, StringRef Target = "") { + + StringRef SPVDir = ""; + if (DeviceLibSPVDir.getNumOccurrences() > 0) + SPVDir = DeviceLibSPVDir; auto PropSet = - computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); + computeModuleProperties(MD.getModule(), MD.entries(), GlobProps, SPVDir); std::string NewSuff = Suff.str(); if (!Target.empty()) { From 5e907b2414a25f09e898a06ce98689bf954b6df5 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 14 Nov 2024 16:44:17 +0800 Subject: [PATCH 07/37] fix clang formath Signed-off-by: jinge90 --- llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 7855a873852b4..6df9bb9f5c487 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -17,8 +17,8 @@ #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/ModuleSplitter.h" -#include "llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h" #include "llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h" +#include "llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SpecConstants.h" #include @@ -466,7 +466,7 @@ PropSetRegTy computeModuleProperties(const Module &M, } PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, - "uses-virtual-functions-set", AllSets); + "uses-virtual-functions-set", AllSets); } } From 35955252d15d3960f3659a5f0e8b2076e7922196 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 18 Nov 2024 14:02:33 +0800 Subject: [PATCH 08/37] Embed required fallback spv in sycl-post-link Signed-off-by: jinge90 --- .../llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h | 16 +++++++++++- .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 26 +++++++++++++++++-- .../SYCLLowerIR/SYCLRequiredDeviceLibs.cpp | 7 +++-- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h b/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h index 1bebe00146d96..ce518e57489c6 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h @@ -40,7 +40,21 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_bfloat16, }; +struct SYCLDeviceLibSPVBinary { + typedef uint8_t value_type; + value_type *SPVRawBytes; + size_t SPVBytesNum; + SYCLDeviceLibSPVBinary(value_type *RawB, size_t BNum) { + SPVRawBytes = RawB; + SPVBytesNum = BNum; + } + value_type *data() const { return SPVRawBytes; } + size_t size() const { return SPVBytesNum; } +}; + void getRequiredSYCLDeviceLibs(const Module &M, - SmallVector &ReqLibs); + SmallVector &ReqLibs); + +const char *getDeviceLibFileName(DeviceLibExt RequiredDeviceLibExt); } // namespace llvm diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 6df9bb9f5c487..5b49a388d8475 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -21,6 +21,8 @@ #include "llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SpecConstants.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" #include #include @@ -167,9 +169,29 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSetRegTy PropSet; { - SmallVector RequiredLibs; + SmallVector RequiredLibs; llvm::getRequiredSYCLDeviceLibs(M, RequiredLibs); - for (auto RL : RequiredLibs) { + for (auto Ext : RequiredLibs) { + const char *SPVFileName = llvm::getDeviceLibFileName(Ext); + std::string SPVPath = + DeviceLibSPVLoc.str() + "/" + std::string(SPVFileName); + if (!llvm::sys::fs::exists(SPVPath)) + continue; + + auto SPVMB = llvm::MemoryBuffer::getFile(SPVPath); + if (!SPVMB) + continue; + + size_t SPVSize = (*SPVMB)->getBufferSize(); + uint8_t *SPVBuffer = reinterpret_cast( + std::aligned_alloc(alignof(uint32_t), SPVSize + sizeof(uint32_t))); + *(reinterpret_cast(SPVBuffer)) = static_cast(Ext); + std::memcpy(SPVBuffer + 1, (*SPVMB)->getBufferStart(), SPVSize); + llvm::SYCLDeviceLibSPVBinary SPVBinaryObj(SPVBuffer, + SPVSize + sizeof(uint32_t)); + PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_BINS, SPVFileName, + SPVBinaryObj); + std::free(SPVBuffer); } } diff --git a/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp b/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp index ae5c9918ad184..38960cdc9c239 100644 --- a/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp @@ -760,7 +760,7 @@ namespace llvm { // 2. The function is declaration which means it doesn't have function body // And we don't expect non-spirv functions with "__devicelib_" prefix. void getRequiredSYCLDeviceLibs( - const Module &M, llvm::SmallVector &ReqDeviceLibs) { + const Module &M, llvm::SmallVector &ReqDeviceLibs) { // Device libraries will be enabled only for spir-v module. if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) return; @@ -776,9 +776,12 @@ void getRequiredSYCLDeviceLibs( continue; DeviceLibUsed.insert(DeviceLibFuncIter->second); - ReqDeviceLibs.push_back(DeviceLibSPVExtMap[DeviceLibFuncIter->second]); + ReqDeviceLibs.push_back(DeviceLibFuncIter->second); } } } +const char *getDeviceLibFileName(DeviceLibExt RequiredDeviceLibExt) { + return DeviceLibSPVExtMap[RequiredDeviceLibExt]; +} } // namespace llvm From f0ecc0aa99141d66439275c23bbc7728e1f06a3b Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 21 Nov 2024 15:21:13 +0800 Subject: [PATCH 09/37] Make sycl runtime to be compatible with binary built with legacy compiler which used devicelib required mask. Signed-off-by: jinge90 --- sycl/source/detail/compiler.hpp | 7 +- sycl/source/detail/device_binary_image.cpp | 3 + sycl/source/detail/device_binary_image.hpp | 4 + .../program_manager/program_manager.cpp | 114 +++++++++++++----- .../program_manager/program_manager.hpp | 4 +- 5 files changed, 100 insertions(+), 32 deletions(-) diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 35f313ceec3f5..42e0981ccc109 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -44,8 +44,13 @@ /// PropertySetIO.h #define __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP \ "SYCL/specialization constants default values" -/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h +/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK is defined in legacy version +/// compiler, we keep this property here and to be compatible with program +/// built with these legacy version compiler. +/// TODO: clear this deprecated property when users upgrade to latest compiler. #define __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" +/// PropertySetRegistry::SYCL_DEVICELIB_REQ_BINS defined in PropertySetIO.h +#define __SYCL_PROPERTY_SET_DEVICELIB_REQ_BINS "SYCL/devicelib req bins" /// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt" /// PropertySetRegistry::SYCL_KERNEL_PROGRAM_METADATA defined in PropertySetIO.h diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 2be48d4a38fce..65a2dedf2d0e8 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -179,7 +179,10 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { SpecConstIDMap.init(Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); SpecConstDefaultValuesMap.init( Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); + // TODO: clear deprecated DeviceLibReqMask when developers upgrade to latest + // compiler. DeviceLibReqMask.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); + DeviceLibReqBins.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_BINS); KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); AssertUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); ProgramMetadata.init(Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA); diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 203427b89ca45..d6eb4db7c83c2 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -213,7 +213,10 @@ class RTDeviceBinaryImage { const PropertyRange &getSpecConstantsDefaultValues() const { return SpecConstDefaultValuesMap; } + // TODO: clear deprecated DeviceLibReqMask when developers upgrade to latest + // version compiler. const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; } + const PropertyRange &getDeviceLibReqBins() const { return DeviceLibReqBins; } const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } @@ -246,6 +249,7 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange SpecConstIDMap; RTDeviceBinaryImage::PropertyRange SpecConstDefaultValuesMap; RTDeviceBinaryImage::PropertyRange DeviceLibReqMask; + RTDeviceBinaryImage::PropertyRange DeviceLibReqBins; RTDeviceBinaryImage::PropertyRange KernelParamOptInfo; RTDeviceBinaryImage::PropertyRange AssertUsed; RTDeviceBinaryImage::PropertyRange ProgramMetadata; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 8f13c0745ad21..b57ac28c54997 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -755,7 +755,8 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } } -static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +static inline void +CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) @@ -844,18 +845,18 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( programReleaseInfo.getFuncPtrFromModule(ur::getURLoaderLibrary()); ProgramPtr ProgramManaged(NativePrg, programRelease); + std::vector ImagesVec; // Link a fallback implementation of device libraries if they are not // supported by a device compiler. // Pre-compiled programs (after AOT compilation or read from persitent // cache) are supposed to be already linked. // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means // no fallback device library will be linked. - uint32_t DeviceLibReqMask = 0; bool UseDeviceLibs = !DeviceCodeWasInCache && Img.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV && !SYCLConfig::get(); if (UseDeviceLibs) - DeviceLibReqMask = getDeviceLibReqMask(Img); + ImagesVec.push_back(&Img); std::vector ProgramsToLink; // If we had a program in cache, then it should have been the fully linked @@ -863,7 +864,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( if (!DeviceCodeWasInCache) { for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) { if (UseDeviceLibs) - DeviceLibReqMask |= getDeviceLibReqMask(*BinImg); + ImagesVec.push_back(BinImg); device_image_plain DevImagePlain = getDeviceImageFromBinaryImage(BinImg, Context, Device); const std::shared_ptr &DeviceImageImpl = @@ -883,10 +884,10 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( } std::vector Devs = { getSyclObjImpl(Device).get()->getHandleRef()}; - ; + ProgramPtr BuiltProgram = build( std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, Devs, - DeviceLibReqMask, ProgramsToLink, + ImagesVec, ProgramsToLink, /*CreatedFromBinary*/ Img.getFormat() != SYCL_DEVICE_BINARY_TYPE_SPIRV); // Those extra programs won't be used anymore, just the final linked result for (ur_program_handle_t Prg : ProgramsToLink) @@ -1505,9 +1506,9 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { } static std::vector -getDeviceLibPrograms(const ContextImplPtr Context, - std::vector &Devices, - uint32_t DeviceLibReqMask) { +getDeviceLibProgramsLegacy(const ContextImplPtr Context, + std::vector &Devices, + uint32_t DeviceLibReqMask) { std::vector Programs; std::pair RequiredDeviceLibExt[] = { @@ -1590,6 +1591,57 @@ getDeviceLibPrograms(const ContextImplPtr Context, return Programs; } +static std::vector +getDeviceLibPrograms(const ContextImplPtr Context, + std::vector &Devices, + const std::vector &Images) { + std::vector Programs; + return Programs; +} + +static void +checkDeviceLibsLinkMode(const std::vector &Images, + bool &LinkDeviceLib, bool &LegacyLinkMode) { + bool ReqMaskAvailable = false, ReqBinsAvailable = false; + for (auto Img : Images) { + const RTDeviceBinaryImage::PropertyRange &LegacyRange = + Img->getDeviceLibReqMask(); + if (LegacyRange.isAvailable()) { + ReqMaskAvailable = true; + continue; + } + + const RTDeviceBinaryImage::PropertyRange &NewRange = + Img->getDeviceLibReqBins(); + if (NewRange.isAvailable()) + ReqBinsAvailable = true; + } + + // If both ReqBins and ReqMask are available, it means user's device image + // and the images in cache are built with different version compiler, we + // don't support such scenario. + if ((!ReqMaskAvailable && !ReqBinsAvailable) || + (ReqMaskAvailable && ReqBinsAvailable)) { + LinkDeviceLib = false; + return; + } + + LinkDeviceLib = true; + LegacyLinkMode = ReqMaskAvailable; +} + +static uint32_t getDeviceLibReqMaskFromImages( + const std::vector &Images) { + uint32_t DeviceLibReqMask = 0; + for (auto Img : Images) { + const RTDeviceBinaryImage::PropertyRange &ReqMaskRange = + Img->getDeviceLibReqMask(); + if (ReqMaskRange.isAvailable()) + DeviceLibReqMask |= DeviceBinaryProperty(*(ReqMaskRange.begin())).asUint32(); + } + return DeviceLibReqMask; +} + // Check if device image is compressed. static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { @@ -1600,19 +1652,25 @@ static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { ProgramManager::ProgramPtr ProgramManager::build( ProgramPtr Program, const ContextImplPtr Context, const std::string &CompileOptions, const std::string &LinkOptions, - std::vector &Devices, uint32_t DeviceLibReqMask, + std::vector &Devices, + const std::vector &Images, const std::vector &ExtraProgramsToLink, bool CreatedFromBinary) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::build(" << Program.get() << ", " << CompileOptions << ", " << LinkOptions << ", " - << VecToString(Devices) << ", " << std::hex << DeviceLibReqMask - << std::dec << ", " << VecToString(ExtraProgramsToLink) << ", " - << CreatedFromBinary << ")\n"; + << VecToString(Devices) << ", " << std::dec << ", " + << VecToString(ExtraProgramsToLink) << ", " << CreatedFromBinary + << ")\n"; } - bool LinkDeviceLibs = (DeviceLibReqMask != 0); + bool LinkDeviceLibs = false; + bool LegacyDeviceLibLinkMode = false; + if (Images.size() == 0) + LinkDeviceLibs = false; + else + checkDeviceLibsLinkMode(Images, LinkDeviceLibs, LegacyDeviceLibLinkMode); // TODO: this is a temporary workaround for GPU tests for ESIMD compiler. // We do not link with other device libraries, because it may fail @@ -1623,7 +1681,13 @@ ProgramManager::ProgramPtr ProgramManager::build( std::vector LinkPrograms; if (LinkDeviceLibs) { - LinkPrograms = getDeviceLibPrograms(Context, Devices, DeviceLibReqMask); + if (LegacyDeviceLibLinkMode) { + uint32_t DeviceLibReqMask = getDeviceLibReqMaskFromImages(Images); + LinkPrograms = + getDeviceLibProgramsLegacy(Context, Devices, DeviceLibReqMask); + } else { + LinkPrograms = getDeviceLibPrograms(Context, Devices, Images); + } } static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK"); @@ -1939,15 +2003,6 @@ void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img, F.close(); } -uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { - const RTDeviceBinaryImage::PropertyRange &DLMRange = - Img.getDeviceLibReqMask(); - if (DLMRange.isAvailable()) - return DeviceBinaryProperty(*(DLMRange.begin())).asUint32(); - else - return 0x0; -} - const KernelArgMask * ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, const std::string &KernelName) { @@ -2640,10 +2695,10 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, // Pre-compiled programs are supposed to be already linked. // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means // no fallback device library will be linked. - uint32_t DeviceLibReqMask = 0; + std::vector ImagesVec; if (Img.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV && !SYCLConfig::get()) - DeviceLibReqMask = getDeviceLibReqMask(Img); + ImagesVec.push_back(&Img); // TODO: Add support for dynamic linking with kernel bundles std::vector ExtraProgramsToLink; @@ -2653,7 +2708,7 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, } ProgramPtr BuiltProgram = build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, - URDevices, DeviceLibReqMask, ExtraProgramsToLink); + URDevices, ImagesVec, ExtraProgramsToLink); emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl); @@ -2869,10 +2924,13 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( // No linking of extra programs reqruired. std::vector ExtraProgramsToLink; std::vector Devs = {DeviceImpl->getHandleRef()}; + std::vector ImagesVec; + // For non-spirv target, we don't need to link any fallback device library. + // An empty images vector will skip linking fallback device libraries. auto BuildProgram = build(std::move(ProgramManaged), detail::getSyclObjImpl(Context), CompileOpts, LinkOpts, Devs, - /*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0, + ImagesVec, ExtraProgramsToLink); ur_kernel_handle_t UrKernel{nullptr}; Adapter->call( diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 0586a41a83540..9fa6e103c6af7 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -177,8 +177,6 @@ class ProgramManager { static std::string getProgramBuildLog(const ur_program_handle_t &Program, const ContextImplPtr Context); - uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); - /// Returns the mask for eliminated kernel arguments for the requested kernel /// within the native program. /// \param NativePrg the UR program associated with the kernel. @@ -307,7 +305,7 @@ class ProgramManager { const std::string &CompileOptions, const std::string &LinkOptions, std::vector &Devices, - uint32_t DeviceLibReqMask, + const std::vector &Images, const std::vector &ProgramsToLink, bool CreatedFromBinary = false); From 352eea35423b0433bd3738aa27933e66af19e984 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 21 Nov 2024 17:17:40 +0800 Subject: [PATCH 10/37] fix bug when writing required devicelib extension to image Signed-off-by: jinge90 --- .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 3 +- .../program_manager/program_manager.cpp | 30 ++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 5b49a388d8475..59ba579a2691a 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -186,7 +186,8 @@ PropSetRegTy computeModuleProperties(const Module &M, uint8_t *SPVBuffer = reinterpret_cast( std::aligned_alloc(alignof(uint32_t), SPVSize + sizeof(uint32_t))); *(reinterpret_cast(SPVBuffer)) = static_cast(Ext); - std::memcpy(SPVBuffer + 1, (*SPVMB)->getBufferStart(), SPVSize); + std::memcpy(SPVBuffer + sizeof(uint32_t), (*SPVMB)->getBufferStart(), + SPVSize); llvm::SYCLDeviceLibSPVBinary SPVBinaryObj(SPVBuffer, SPVSize + sizeof(uint32_t)); PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_BINS, SPVFileName, diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index b57ac28c54997..7212b248bf6a6 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1596,6 +1596,25 @@ getDeviceLibPrograms(const ContextImplPtr Context, std::vector &Devices, const std::vector &Images) { std::vector Programs; + for (auto Img : Images) { + if (!Img) + continue; + const RTDeviceBinaryImage::PropertyRange &NewRange = + Img->getDeviceLibReqBins(); + if (!NewRange.isAvailable()) + continue; + + for (const auto &DeviceLibBinProp : NewRange) { + auto DeviceLibByteArray = + DeviceBinaryProperty(DeviceLibBinProp).asByteArray(); + DeviceLibByteArray.dropBytes(8); + uint32_t DeviceLibExtReq = + (static_cast(DeviceLibByteArray[3]) << 24) | + (static_cast(DeviceLibByteArray[2]) << 16) | + (static_cast(DeviceLibByteArray[1]) << 8) | + DeviceLibByteArray[0]; + } + } return Programs; } @@ -1604,6 +1623,8 @@ checkDeviceLibsLinkMode(const std::vector &Images, bool &LinkDeviceLib, bool &LegacyLinkMode) { bool ReqMaskAvailable = false, ReqBinsAvailable = false; for (auto Img : Images) { + if (!Img) + continue; const RTDeviceBinaryImage::PropertyRange &LegacyRange = Img->getDeviceLibReqMask(); if (LegacyRange.isAvailable()) { @@ -1634,10 +1655,13 @@ static uint32_t getDeviceLibReqMaskFromImages( const std::vector &Images) { uint32_t DeviceLibReqMask = 0; for (auto Img : Images) { + if (!Img) + continue; const RTDeviceBinaryImage::PropertyRange &ReqMaskRange = Img->getDeviceLibReqMask(); if (ReqMaskRange.isAvailable()) - DeviceLibReqMask |= DeviceBinaryProperty(*(ReqMaskRange.begin())).asUint32(); + DeviceLibReqMask |= + DeviceBinaryProperty(*(ReqMaskRange.begin())).asUint32(); } return DeviceLibReqMask; } @@ -2929,9 +2953,7 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( // An empty images vector will skip linking fallback device libraries. auto BuildProgram = build(std::move(ProgramManaged), detail::getSyclObjImpl(Context), - CompileOpts, LinkOpts, Devs, - ImagesVec, - ExtraProgramsToLink); + CompileOpts, LinkOpts, Devs, ImagesVec, ExtraProgramsToLink); ur_kernel_handle_t UrKernel{nullptr}; Adapter->call( BuildProgram.get(), KernelName.c_str(), &UrKernel); From 331644eb75f96005dcf11f93d9ac6396eb213a1e Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 25 Nov 2024 16:24:45 +0800 Subject: [PATCH 11/37] extract required spv binary from exe Signed-off-by: jinge90 --- .../program_manager/program_manager.cpp | 112 ++++++++++++++++-- 1 file changed, 102 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 30c86fc3aac28..918361d688e5d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -1112,8 +1113,8 @@ ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, // TODO device libraries may use scpecialization constants, manifest files, etc. // To support that they need to be delivered in a different container - so that // sycl_device_binary_struct can be created for each of them. -static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, - ur_program_handle_t &Prog) { +static bool loadDeviceLibLegacy(const ContextImplPtr Context, const char *Name, + ur_program_handle_t &Prog) { std::string LibSyclDir = OSUtil::getCurrentDSODir(); std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, std::ifstream::in | std::ifstream::binary); @@ -1133,6 +1134,13 @@ static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, return Prog != nullptr; } +static bool loadDeviceLib(const ContextImplPtr Context, + ur_program_handle_t &Prog, + const unsigned char *SPVBuffer, size_t SPVSize) { + Prog = createSpirvProgram(Context, SPVBuffer, SPVSize); + return Prog != nullptr; +} + // For each extension, a pair of library names. The first uses native support, // the second emulates functionality in software. static const std::map> @@ -1213,9 +1221,13 @@ static ur_result_t doCompile(const AdapterPtr &Adapter, static ur_program_handle_t loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, std::vector &Devices, - bool UseNativeLib) { + bool UseNativeLib, bool LegacyMode = true, + const unsigned char *SPVBuffer = nullptr, + size_t SPVSize = 0) { - auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); + const char *LibFileName = nullptr; + if (LegacyMode) + LibFileName = getDeviceLibFilename(Extension, UseNativeLib); auto LockedCache = Context->acquireCachedLibPrograms(); auto &CachedLibPrograms = LockedCache.get(); // Collect list of devices to compile the library for. Library was already @@ -1252,10 +1264,20 @@ loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, bool IsProgramCreated = !URProgram; // Create UR program for device lib if we don't have it yet. - if (!URProgram && !loadDeviceLib(Context, LibFileName, URProgram)) { - EraseProgramForDevices(); - throw exception(make_error_code(errc::build), - std::string("Failed to load ") + LibFileName); + if (LegacyMode) { + if (!URProgram && !loadDeviceLibLegacy(Context, LibFileName, URProgram)) { + EraseProgramForDevices(); + throw exception(make_error_code(errc::build), + std::string("Failed to load ") + LibFileName); + } + } else { + if (!URProgram && !loadDeviceLib(Context, URProgram, SPVBuffer, SPVSize)) { + EraseProgramForDevices(); + const char *ExtStr = getDeviceLibExtensionStr(Extension); + throw exception( + make_error_code(errc::build), + std::string("Failed to load fallback device library for ") + ExtStr); + } } // Insert URProgram into the cache for all devices that we compiled it for. @@ -1513,6 +1535,8 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { return ((DeviceLibReqMask & Mask) == Mask); } +// TODO: Clear legacy getDeviceLibPrograms when developers upgrade to +// latest version compiler. static std::vector getDeviceLibProgramsLegacy(const ContextImplPtr Context, std::vector &Devices, @@ -1604,6 +1628,38 @@ getDeviceLibPrograms(const ContextImplPtr Context, std::vector &Devices, const std::vector &Images) { std::vector Programs; + std::map DeviceLibExtLoaded = { + {DeviceLibExt::cl_intel_devicelib_assert, + /* is fallback loaded? */ false}, + {DeviceLibExt::cl_intel_devicelib_math, false}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, false}, + {DeviceLibExt::cl_intel_devicelib_complex, false}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, false}, + {DeviceLibExt::cl_intel_devicelib_cstring, false}, + {DeviceLibExt::cl_intel_devicelib_imf, false}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, false}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, false}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, false}}; + + // Check whether a specified extension is supported by ALL devices. + auto checkExtForDevices = [&Context, &Devices](const char *ExtStr) -> bool { + bool ExtAvailable = true; + for (auto SingleDevice : Devices) { + std::string DevExtList = + Context->getPlatformImpl() + ->getDeviceImpl(SingleDevice) + ->get_device_info_string( + UrInfoCode::value); + if (DevExtList.npos == DevExtList.find(ExtStr)) { + ExtAvailable = false; + break; + } + } + return ExtAvailable; + }; + + const bool fp64Support = checkExtForDevices("cl_khr_fp64"); + for (auto Img : Images) { if (!Img) continue; @@ -1616,11 +1672,47 @@ getDeviceLibPrograms(const ContextImplPtr Context, auto DeviceLibByteArray = DeviceBinaryProperty(DeviceLibBinProp).asByteArray(); DeviceLibByteArray.dropBytes(8); - uint32_t DeviceLibExtReq = + DeviceLibExt DeviceLibExtReq = static_cast( (static_cast(DeviceLibByteArray[3]) << 24) | (static_cast(DeviceLibByteArray[2]) << 16) | (static_cast(DeviceLibByteArray[1]) << 8) | - DeviceLibByteArray[0]; + DeviceLibByteArray[0]); + if (DeviceLibExtLoaded.count(DeviceLibExtReq) != 1) { + if constexpr (DbgProgMgr > 0) { + std::cerr << "Unknown DeviceLib extension(" + << static_cast(DeviceLibExtReq) << ")!" + << std::endl; + } + continue; + } + + if (DeviceLibExtLoaded[DeviceLibExtReq]) + continue; + + if ((DeviceLibExtReq == DeviceLibExt::cl_intel_devicelib_math_fp64 || + DeviceLibExtReq == DeviceLibExt::cl_intel_devicelib_complex_fp64 || + DeviceLibExtReq == DeviceLibExt::cl_intel_devicelib_imf_fp64) && + !fp64Support) + continue; + + auto DeviceLibExtReqName = getDeviceLibExtensionStr(DeviceLibExtReq); + bool InhibitNativeImpl = false; + if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { + InhibitNativeImpl = strstr(Env, DeviceLibExtReqName) != nullptr; + } + + bool ExtReqAvailable = checkExtForDevices(DeviceLibExtReqName); + + // Load fallback device library only when 1) or 2) is met: + // 1. underlying device doesn't support the extension + // 2. user explicitly ask to inhibit usage of native support + if (!ExtReqAvailable || InhibitNativeImpl) { + DeviceLibByteArray.dropBytes(4); + Programs.push_back(loadDeviceLibFallback( + Context, DeviceLibExtReq, Devices, + /*UseNativeLib=*/false, false, DeviceLibByteArray.begin(), + DeviceLibByteArray.size())); + } } } return Programs; From 69c6f7dffad0f35081f0618224220e4fef74d09c Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 25 Nov 2024 17:04:14 +0800 Subject: [PATCH 12/37] workaround for std::aligned_alloc Signed-off-by: jinge90 --- llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 59ba579a2691a..e52daac756ddf 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -183,8 +183,14 @@ PropSetRegTy computeModuleProperties(const Module &M, continue; size_t SPVSize = (*SPVMB)->getBufferSize(); + // std::aligned_alloc is not available in some pre-ci Windows machine. +#if defined(_WIN32) || defined(_WIN64) + uint8_t *SPVBuffer = reinterpret_cast( + _aligned_malloc(alignof(uint32_t), SPVSize + sizeof(uint32_t))); +#else uint8_t *SPVBuffer = reinterpret_cast( std::aligned_alloc(alignof(uint32_t), SPVSize + sizeof(uint32_t))); +#endif *(reinterpret_cast(SPVBuffer)) = static_cast(Ext); std::memcpy(SPVBuffer + sizeof(uint32_t), (*SPVMB)->getBufferStart(), SPVSize); @@ -192,7 +198,11 @@ PropSetRegTy computeModuleProperties(const Module &M, SPVSize + sizeof(uint32_t)); PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_BINS, SPVFileName, SPVBinaryObj); +#if defined(_WIN32) || defined(_WIN64) + _aligned_free(SPVBUffer); +#else std::free(SPVBuffer); +#endif } } From 07656ea211b6b5e3e6d9aef414c8d94a12b4977f Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 25 Nov 2024 17:23:15 +0800 Subject: [PATCH 13/37] fix typo Signed-off-by: jinge90 --- llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index e52daac756ddf..7c7bbf505a63f 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -199,7 +199,7 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_BINS, SPVFileName, SPVBinaryObj); #if defined(_WIN32) || defined(_WIN64) - _aligned_free(SPVBUffer); + _aligned_free(SPVBuffer); #else std::free(SPVBuffer); #endif From 77b0fbf412f9b0142338e772eb0dbeb5c1d45e0e Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 11:37:58 +0800 Subject: [PATCH 14/37] fix sycl-linker-wrapper-image.cpp lit failure Signed-off-by: jinge90 --- clang/test/Driver/sycl-linker-wrapper-image.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/clang/test/Driver/sycl-linker-wrapper-image.cpp b/clang/test/Driver/sycl-linker-wrapper-image.cpp index fb81abd4b7ef0..df05d49b939b5 100644 --- a/clang/test/Driver/sycl-linker-wrapper-image.cpp +++ b/clang/test/Driver/sycl-linker-wrapper-image.cpp @@ -44,21 +44,18 @@ int main() { // CHECK-DAG: @.sycl_offloading.target.0 = internal unnamed_addr constant [7 x i8] c"spir64\00" // CHECK-DAG: @.sycl_offloading.opts.compile.0 = internal unnamed_addr constant [1 x i8] zeroinitializer // CHECK-DAG: @.sycl_offloading.opts.link.0 = internal unnamed_addr constant [1 x i8] zeroinitializer -// CHECK-DAG: @prop = internal unnamed_addr constant [17 x i8] c"DeviceLibReqMask\00" -// CHECK-DAG: @__sycl_offload_prop_sets_arr = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop, ptr null, i32 1, i64 0 }] -// CHECK-DAG: @SYCL_PropSetName = internal unnamed_addr constant [24 x i8] c"SYCL/devicelib req mask\00" -// CHECK-DAG: @prop.1 = internal unnamed_addr constant [8 x i8] c"aspects\00" +// CHECK-DAG: @prop = internal unnamed_addr constant [8 x i8] c"aspects\00" // CHECK-DAG: @prop_val = internal unnamed_addr constant [8 x i8] zeroinitializer -// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop.1, ptr @prop_val, i32 2, i64 8 }] -// CHECK-DAG: @SYCL_PropSetName.3 = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" -// CHECK-DAG: @SYCL_PropSetName.4 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" -// CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }] +// CHECK-DAG: @__sycl_offload_prop_sets_arr = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop, ptr @prop_val, i32 2, i64 8 }] +// CHECK-DAG: @SYCL_PropSetName = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" +// CHECK-DAG: @SYCL_PropSetName.1 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" +// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [2 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.1, ptr null, ptr null }] // CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [772 x i8] // CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00" // CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { ptr null, ptr @__sycl_offload_entry_name, i64 0, i32 0, i32 0 }] // CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 772], section ".tgtimg", align 16 // CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata" -// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([772 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 772), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }] +// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([772 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 772), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([2 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 2) }] // CHECK-DAG: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null } // CHECK-DAG: @llvm.global_ctors = {{.*}} { i32 1, ptr @sycl.descriptor_reg, ptr null }] // CHECK-DAG: @llvm.global_dtors = {{.*}} { i32 1, ptr @sycl.descriptor_unreg, ptr null }] From 8878ffc1c634ac8da56be1cbb0c12ecb8f886ca0 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 12:27:03 +0800 Subject: [PATCH 15/37] fix sycl-post-link-options lit failure Signed-off-by: jinge90 --- clang/test/Driver/sycl-post-link-options.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-post-link-options.cpp b/clang/test/Driver/sycl-post-link-options.cpp index c343f4685aaa1..57235a3cf97a4 100644 --- a/clang/test/Driver/sycl-post-link-options.cpp +++ b/clang/test/Driver/sycl-post-link-options.cpp @@ -3,7 +3,7 @@ // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl -### \ // RUN: --no-offload-new-driver -Xdevice-post-link -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s -// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" +// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-spv-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" // // Generate .o file as linker wrapper input. // From fce23f67aac33ffd209b38c38a0dbad65252c87a Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 12:28:35 +0800 Subject: [PATCH 16/37] fix sycl-offload-new-driver.c lit failure Signed-off-by: jinge90 --- clang/test/Driver/sycl-offload-new-driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index dd656192b80f3..8087c7a987e45 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -60,7 +60,7 @@ // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ // RUN: -Xdevice-post-link -post-link-opt -### %s 2>&1 \ // RUN: | FileCheck -check-prefix WRAPPER_OPTIONS_POSTLINK %s -// WRAPPER_OPTIONS_POSTLINK: clang-linker-wrapper{{.*}} "--sycl-post-link-options=-O2 -device-globals -post-link-opt" +// WRAPPER_OPTIONS_POSTLINK: clang-linker-wrapper{{.*}} "--sycl-post-link-options=-O2 -device-globals --device-lib-spv-dir={{.*}} -post-link-opt" // -fsycl-device-only behavior // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ From df640583119dd10d3730c9a4ae0d4357c9bad435 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 15:44:31 +0800 Subject: [PATCH 17/37] don't pass spv dir path in AOT or devicelib is disabled Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/Clang.cpp | 41 +++++++++++++++++++-------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 601d499ffee22..a7f04c5cc35c5 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10929,18 +10929,35 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (allowDeviceImageDependencies(TCArgs)) addArgs(PostLinkArgs, TCArgs, {"-allow-device-image-dependencies"}); - SYCLInstallationDetector SYCLInstall(TC.getDriver()); - SmallVector, 4> SpvLocCandidates; - SmallString<128> FallbackAssertName("libsycl-fallback-cassert.spv"); - SYCLInstall.getSYCLDeviceLibPath(SpvLocCandidates, true); - for (const auto &SpvLoc : SpvLocCandidates) { - SmallString<128> FullLibName(SpvLoc); - llvm::sys::path::append(FullLibName, FallbackAssertName); - if (llvm::sys::fs::exists(FullLibName)) { - SmallString<128> SYCLDeviceLibDir("--device-lib-spv-dir="); - SYCLDeviceLibDir += SpvLoc.str(); - addArgs(PostLinkArgs, TCArgs, {SYCLDeviceLibDir.str()}); - break; + bool DeviceLibDisable = false; + Arg *DeviceLibArg = TCArgs.getLastArg(options::OPT_fsycl_device_lib_EQ, + options::OPT_fno_sycl_device_lib_EQ); + if (DeviceLibArg && + DeviceLibArg->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) { + for (StringRef Val : DeviceLibArg->getValues()) { + if (Val == "all") { + DeviceLibDisable = true; + break; + } + } + } + + // Fallback spv is NOT involved in AOT compilation or + // '-fno-sycl-device-lib=all' is applied by user explicitly. + if (!TC.getTriple().isSPIRAOT() && !DeviceLibDisable) { + SYCLInstallationDetector SYCLInstall(TC.getDriver()); + SmallVector, 4> SpvLocCandidates; + SmallString<128> FallbackAssertName("libsycl-fallback-cassert.spv"); + SYCLInstall.getSYCLDeviceLibPath(SpvLocCandidates, true); + for (const auto &SpvLoc : SpvLocCandidates) { + SmallString<128> FullLibName(SpvLoc); + llvm::sys::path::append(FullLibName, FallbackAssertName); + if (llvm::sys::fs::exists(FullLibName)) { + SmallString<128> SYCLDeviceLibDir("--device-lib-spv-dir="); + SYCLDeviceLibDir += SpvLoc.str(); + addArgs(PostLinkArgs, TCArgs, {SYCLDeviceLibDir.str()}); + break; + } } } } From 22757c732e300fc37f778e8049a2ba94c518799b Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 15:58:07 +0800 Subject: [PATCH 18/37] skip embeding spv when empty spv loc is specified Signed-off-by: jinge90 --- llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 7c7bbf505a63f..fb5be1d1b49bf 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -168,7 +168,9 @@ PropSetRegTy computeModuleProperties(const Module &M, const StringRef &DeviceLibSPVLoc) { PropSetRegTy PropSet; - { + // If sycl-post-link doesn't specify a valid fallback spv path, the param + // 'DeviceLibSPVLoc' is set to an empty string. + if (!DeviceLibSPVLoc.empty()) { SmallVector RequiredLibs; llvm::getRequiredSYCLDeviceLibs(M, RequiredLibs); for (auto Ext : RequiredLibs) { From 1902e2ea1c3f062f2461a75cf38a14b5cbe4c6c9 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 17:33:36 +0800 Subject: [PATCH 19/37] fix build error Signed-off-by: jinge90 --- sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 910bfef64cd0f..c95d6093de694 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -423,7 +423,7 @@ Expected jit_compiler::performPostLink( /*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true, /*DeviceGlobals=*/false}; PropertySetRegistry Properties = - computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq); + computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq, ""); // TODO: Manually add `compile_target` property as in // `saveModuleProperties`? const auto &PropertySets = Properties.getPropSets(); From 353de1ba41fa07e81b9ecf8942cfd8e84c252f29 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 18:10:18 +0800 Subject: [PATCH 20/37] remove lit check for sycl device lib require mask Signed-off-by: jinge90 --- .../device-requirements/mask.ll | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 llvm/test/tools/sycl-post-link/device-requirements/mask.ll diff --git a/llvm/test/tools/sycl-post-link/device-requirements/mask.ll b/llvm/test/tools/sycl-post-link/device-requirements/mask.ll deleted file mode 100644 index 31b393249bf5f..0000000000000 --- a/llvm/test/tools/sycl-post-link/device-requirements/mask.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: sycl-post-link -properties -split=auto < %s -o %t.files.table -; RUN: FileCheck %s -input-file=%t.files_0.prop - -; CHECK:[SYCL/devicelib req mask] -; CHECK: DeviceLibReqMask=1|64 - -source_filename = "main.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spirv64-unknown-unknown" - -declare spir_func i32 @__devicelib_imf_umulhi(i32 noundef %0, i32 noundef %1) - -; Function Attrs: convergent mustprogress noinline norecurse optnone -define weak_odr dso_local spir_kernel void @kernel() #0 { -entry: - %0 = call i32 @__devicelib_imf_umulhi(i32 0, i32 0) - ret void -} - -attributes #0 = { "sycl-module-id"="main.cpp" } From 8bbe9760cd501fdc703606d2626fc1f22aac75fe Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 20:03:11 +0800 Subject: [PATCH 21/37] fix sycl-post-link-options-win lit failure Signed-off-by: jinge90 --- clang/test/Driver/sycl-post-link-options-win.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-post-link-options-win.cpp b/clang/test/Driver/sycl-post-link-options-win.cpp index 7c641cea6783d..16ab31ef37891 100644 --- a/clang/test/Driver/sycl-post-link-options-win.cpp +++ b/clang/test/Driver/sycl-post-link-options-win.cpp @@ -3,7 +3,7 @@ // RUN: %clangxx -### --target=x86_64-pc-windows-msvc -fsycl \ // RUN: -Xdevice-post-link -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s -// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" +// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-spv-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" // ------- // Generate .o file as linker wrapper input. // From 217daf2d7790fe63134c71050e4e3e1bbf42801f Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 26 Nov 2024 21:17:02 +0800 Subject: [PATCH 22/37] no need to pass spv dir in AOT Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/Clang.cpp | 3 ++- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a7f04c5cc35c5..084af176509ff 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10944,7 +10944,8 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, // Fallback spv is NOT involved in AOT compilation or // '-fno-sycl-device-lib=all' is applied by user explicitly. - if (!TC.getTriple().isSPIRAOT() && !DeviceLibDisable) { + if (TC.getTriple().isSPIROrSPIRV() && !TC.getTriple().isSPIRAOT() && + !DeviceLibDisable) { SYCLInstallationDetector SYCLInstall(TC.getDriver()); SmallVector, 4> SpvLocCandidates; SmallString<128> FallbackAssertName("libsycl-fallback-cassert.spv"); diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index bf07362e02ec2..90a2184801850 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -729,7 +729,7 @@ runSYCLPostLinkTool(ArrayRef InputFiles, const ArgList &Args) { CmdArgs.push_back(*SYCLPostLinkPath); const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ); - if (SYCLDeviceLibLoc) { + if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) { std::string SYCLDeviceLibSPVLoc = SYCLDeviceLibLoc->getValue(); llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ)); if (HostTriple.isOSWindows()) From 094bcfcfd97855117f4d7563a1d5632bfec3a6bd Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 27 Nov 2024 10:21:49 +0800 Subject: [PATCH 23/37] fix new offload driver lit Signed-off-by: jinge90 --- clang/test/Driver/sycl-offload-new-driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index 8087c7a987e45..dd656192b80f3 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -60,7 +60,7 @@ // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ // RUN: -Xdevice-post-link -post-link-opt -### %s 2>&1 \ // RUN: | FileCheck -check-prefix WRAPPER_OPTIONS_POSTLINK %s -// WRAPPER_OPTIONS_POSTLINK: clang-linker-wrapper{{.*}} "--sycl-post-link-options=-O2 -device-globals --device-lib-spv-dir={{.*}} -post-link-opt" +// WRAPPER_OPTIONS_POSTLINK: clang-linker-wrapper{{.*}} "--sycl-post-link-options=-O2 -device-globals -post-link-opt" // -fsycl-device-only behavior // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ From c310a454a54b42d0459b2d093d1eb6fe596c766b Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 27 Nov 2024 11:12:06 +0800 Subject: [PATCH 24/37] check null ptr returned by aligned_alloc Signed-off-by: jinge90 --- .../lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index fb5be1d1b49bf..89c2fb95f8086 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -193,8 +193,22 @@ PropSetRegTy computeModuleProperties(const Module &M, uint8_t *SPVBuffer = reinterpret_cast( std::aligned_alloc(alignof(uint32_t), SPVSize + sizeof(uint32_t))); #endif + + if (!SPVBuffer) + continue; + + // The data embedded consists of 2 parts, first 4 bytes are corresponding + // DeivceLib extension and the following bytes are raw data of fallback + // spv files. There is 1 exception for native bfloat16 spv, it is used + // to support native bfloat16 conversions on some devices and it doesn't + // fully comply to fallback device library mechanism, the extension + // 'cl_intel_devicelib_bfloat16' corresponds to 2 fallback spvs: native + // version used for devices which supports native bfloat16 conversion and + // generic version for all other devices, so we have to embed 1 one field + // to distinguish. *(reinterpret_cast(SPVBuffer)) = static_cast(Ext); - std::memcpy(SPVBuffer + sizeof(uint32_t), (*SPVMB)->getBufferStart(), + size_t RawSPVOffset = sizeof(uint32_t); + std::memcpy(SPVBuffer + RawSPVOffset, (*SPVMB)->getBufferStart(), SPVSize); llvm::SYCLDeviceLibSPVBinary SPVBinaryObj(SPVBuffer, SPVSize + sizeof(uint32_t)); From 98a7e5c59c813810c04bd22332ded467f2e63a23 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 27 Nov 2024 14:21:53 +0800 Subject: [PATCH 25/37] support bfloat16 native spv Signed-off-by: jinge90 --- .../llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h | 12 +++- .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 37 +++++------ .../SYCLLowerIR/SYCLRequiredDeviceLibs.cpp | 62 ++++++++++++------- .../program_manager/program_manager.cpp | 19 ++++++ .../program_manager/program_manager.hpp | 8 ++- 5 files changed, 89 insertions(+), 49 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h b/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h index ce518e57489c6..2df741fdc9f24 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h @@ -40,6 +40,14 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_bfloat16, }; +enum class DeviceLibIsNative : std::uint32_t { Yes, No, Ignore }; + +struct SYCLDeviceLibSPVMeta { + DeviceLibExt SPVExt; + const char *SPVFileName; + DeviceLibIsNative IsNative; +}; + struct SYCLDeviceLibSPVBinary { typedef uint8_t value_type; value_type *SPVRawBytes; @@ -53,8 +61,6 @@ struct SYCLDeviceLibSPVBinary { }; void getRequiredSYCLDeviceLibs(const Module &M, - SmallVector &ReqLibs); - -const char *getDeviceLibFileName(DeviceLibExt RequiredDeviceLibExt); + SmallVector &ReqLibs); } // namespace llvm diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 89c2fb95f8086..a33afdf5e236a 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -171,12 +171,11 @@ PropSetRegTy computeModuleProperties(const Module &M, // If sycl-post-link doesn't specify a valid fallback spv path, the param // 'DeviceLibSPVLoc' is set to an empty string. if (!DeviceLibSPVLoc.empty()) { - SmallVector RequiredLibs; + SmallVector RequiredLibs; llvm::getRequiredSYCLDeviceLibs(M, RequiredLibs); - for (auto Ext : RequiredLibs) { - const char *SPVFileName = llvm::getDeviceLibFileName(Ext); + for (auto ExtMeta : RequiredLibs) { std::string SPVPath = - DeviceLibSPVLoc.str() + "/" + std::string(SPVFileName); + DeviceLibSPVLoc.str() + "/" + std::string(ExtMeta.SPVFileName); if (!llvm::sys::fs::exists(SPVPath)) continue; @@ -188,31 +187,27 @@ PropSetRegTy computeModuleProperties(const Module &M, // std::aligned_alloc is not available in some pre-ci Windows machine. #if defined(_WIN32) || defined(_WIN64) uint8_t *SPVBuffer = reinterpret_cast( - _aligned_malloc(alignof(uint32_t), SPVSize + sizeof(uint32_t))); + _aligned_malloc(alignof(uint32_t), SPVSize + sizeof(uint32_t) * 2)); #else - uint8_t *SPVBuffer = reinterpret_cast( - std::aligned_alloc(alignof(uint32_t), SPVSize + sizeof(uint32_t))); + uint8_t *SPVBuffer = reinterpret_cast(std::aligned_alloc( + alignof(uint32_t), SPVSize + sizeof(uint32_t) * 2)); #endif if (!SPVBuffer) continue; - // The data embedded consists of 2 parts, first 4 bytes are corresponding - // DeivceLib extension and the following bytes are raw data of fallback - // spv files. There is 1 exception for native bfloat16 spv, it is used - // to support native bfloat16 conversions on some devices and it doesn't - // fully comply to fallback device library mechanism, the extension - // 'cl_intel_devicelib_bfloat16' corresponds to 2 fallback spvs: native - // version used for devices which supports native bfloat16 conversion and - // generic version for all other devices, so we have to embed 1 one field - // to distinguish. - *(reinterpret_cast(SPVBuffer)) = static_cast(Ext); - size_t RawSPVOffset = sizeof(uint32_t); - std::memcpy(SPVBuffer + RawSPVOffset, (*SPVMB)->getBufferStart(), + // The data embedded consists of 3 parts, overall layout is following: + // |--devicelib ext(4 byte)--|--IsNative Flag(4 byte)--|--spv raw data--| + *(reinterpret_cast(SPVBuffer)) = + static_cast(ExtMeta.SPVExt); + + *(reinterpret_cast(SPVBuffer + sizeof(uint32_t))) = + static_cast(ExtMeta.IsNative); + std::memcpy(SPVBuffer + sizeof(uint32_t) * 2, (*SPVMB)->getBufferStart(), SPVSize); llvm::SYCLDeviceLibSPVBinary SPVBinaryObj(SPVBuffer, - SPVSize + sizeof(uint32_t)); - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_BINS, SPVFileName, + SPVSize + sizeof(uint32_t) * 2); + PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_BINS, ExtMeta.SPVFileName, SPVBinaryObj); #if defined(_WIN32) || defined(_WIN64) _aligned_free(SPVBuffer); diff --git a/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp b/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp index 38960cdc9c239..6b2d912372308 100644 --- a/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp @@ -735,24 +735,43 @@ SYCLDeviceLibFuncMap SDLMap = { } // namespace -// Each fallback device library corresponds to one SPV file whose name is kept -// in DeviceLibSPVExtMap. -static std::unordered_map DeviceLibSPVExtMap = { - {DeviceLibExt::cl_intel_devicelib_assert, "libsycl-fallback-cassert.spv"}, - {DeviceLibExt::cl_intel_devicelib_math, "libsycl-fallback-cmath.spv"}, +// One devicelib extension may correspond to multiple spv files, following +// map stores corresponding index values in SPVMetaList for an extension. +static std::unordered_map> + DeviceLibSPVExtMap = {{DeviceLibExt::cl_intel_devicelib_assert, {0}}, + {DeviceLibExt::cl_intel_devicelib_math, {1}}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, {2}}, + {DeviceLibExt::cl_intel_devicelib_complex, {3}}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, {4}}, + {DeviceLibExt::cl_intel_devicelib_cstring, {5}}, + {DeviceLibExt::cl_intel_devicelib_imf, {6}}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, {7}}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, {8}}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, {9, 10}}}; + +static SYCLDeviceLibSPVMeta SPVMetaList[] = { + {DeviceLibExt::cl_intel_devicelib_assert, "libsycl-fallback-cassert.spv", + DeviceLibIsNative::Ignore}, + {DeviceLibExt::cl_intel_devicelib_math, "libsycl-fallback-cmath.spv", + DeviceLibIsNative::Ignore}, {DeviceLibExt::cl_intel_devicelib_math_fp64, - "libsycl-fallback-cmath-fp64.spv"}, - {DeviceLibExt::cl_intel_devicelib_complex, "libsycl-fallback-complex.spv"}, + "libsycl-fallback-cmath-fp64.spv", DeviceLibIsNative::Ignore}, + {DeviceLibExt::cl_intel_devicelib_complex, "libsycl-fallback-complex.spv", + DeviceLibIsNative::Ignore}, {DeviceLibExt::cl_intel_devicelib_complex_fp64, - "libsycl-fallback-complex-fp64.spv"}, - {DeviceLibExt::cl_intel_devicelib_cstring, "libsycl-fallback-cstring.spv"}, - {DeviceLibExt::cl_intel_devicelib_imf, "libsycl-fallback-imf.spv"}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, - "libsycl-fallback-imf-fp64.spv"}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, - "libsycl-fallback-imf-bf16.spv"}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, - "libsycl-fallback-bfloat16.spv"}}; + "libsycl-fallback-complex-fp64.spv", DeviceLibIsNative::Ignore}, + {DeviceLibExt::cl_intel_devicelib_cstring, "libsycl-fallback-cstring.spv", + DeviceLibIsNative::Ignore}, + {DeviceLibExt::cl_intel_devicelib_imf, "libsycl-fallback-imf.spv", + DeviceLibIsNative::Ignore}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, "libsycl-fallback-imf-fp64.spv", + DeviceLibIsNative::Ignore}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, "libsycl-fallback-imf-bf16.spv", + DeviceLibIsNative::Ignore}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, "libsycl-fallback-bfloat16.spv", + DeviceLibIsNative::No}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, "libsycl-native-bfloat16.spv", + DeviceLibIsNative::Yes}}; namespace llvm { // For each device image module, we go through all functions which meets @@ -760,7 +779,8 @@ namespace llvm { // 2. The function is declaration which means it doesn't have function body // And we don't expect non-spirv functions with "__devicelib_" prefix. void getRequiredSYCLDeviceLibs( - const Module &M, llvm::SmallVector &ReqDeviceLibs) { + const Module &M, + llvm::SmallVector &ReqDeviceLibs) { // Device libraries will be enabled only for spir-v module. if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) return; @@ -776,12 +796,10 @@ void getRequiredSYCLDeviceLibs( continue; DeviceLibUsed.insert(DeviceLibFuncIter->second); - ReqDeviceLibs.push_back(DeviceLibFuncIter->second); + for (size_t idx : DeviceLibSPVExtMap[DeviceLibFuncIter->second]) { + ReqDeviceLibs.push_back(SPVMetaList[idx]); + } } } } - -const char *getDeviceLibFileName(DeviceLibExt RequiredDeviceLibExt) { - return DeviceLibSPVExtMap[RequiredDeviceLibExt]; -} } // namespace llvm diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index be954f2bc2c3d..f51f8f2372b56 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1734,6 +1734,12 @@ getDeviceLibPrograms(const ContextImplPtr Context, !fp64Support) continue; + DeviceLibByteArray.dropBytes(4); + DeviceLibIsNative IsNativeSPV = static_cast( + (static_cast(DeviceLibByteArray[3]) << 24) | + (static_cast(DeviceLibByteArray[2]) << 16) | + (static_cast(DeviceLibByteArray[1]) << 8) | + DeviceLibByteArray[0]); auto DeviceLibExtReqName = getDeviceLibExtensionStr(DeviceLibExtReq); bool InhibitNativeImpl = false; if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { @@ -1746,11 +1752,24 @@ getDeviceLibPrograms(const ContextImplPtr Context, // 1. underlying device doesn't support the extension // 2. user explicitly ask to inhibit usage of native support if (!ExtReqAvailable || InhibitNativeImpl) { + if (IsNativeSPV == DeviceLibIsNative::Yes) + continue; DeviceLibByteArray.dropBytes(4); Programs.push_back(loadDeviceLibFallback( Context, DeviceLibExtReq, Devices, /*UseNativeLib=*/false, false, DeviceLibByteArray.begin(), DeviceLibByteArray.size())); + } else { + // bfloat16 spv has native and generic version, if native support is + // available in underlying device, we should use native version and + // ignore generic version. + if (IsNativeSPV != DeviceLibIsNative::Yes) + continue; + DeviceLibByteArray.dropBytes(4); + Programs.push_back(loadDeviceLibFallback( + Context, DeviceLibExtReq, Devices, + /*UseNativeLib=*/true, false, DeviceLibByteArray.begin(), + DeviceLibByteArray.size())); } } } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 313ea40cc9dc4..cbcf997a67d8a 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -71,9 +71,9 @@ class device_impl; using DeviceImplPtr = std::shared_ptr; class queue_impl; class event_impl; -// DeviceLibExt is shared between sycl runtime and sycl-post-link tool. -// If any update is made here, need to sync with DeviceLibExt definition -// in llvm/tools/sycl-post-link/sycl-post-link.cpp +// DeviceLibExt and DeviceLibIsNaitve are shared between sycl runtime and +// SYCL Post Link tool. If any update is made here, please sync with definition +// in llvm/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_assert, cl_intel_devicelib_math, @@ -87,6 +87,8 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_bfloat16, }; +enum class DeviceLibIsNative : std::uint32_t {Yes, No, Ignore}; + // Provides single loading and building OpenCL programs with unique contexts // that is necessary for no interoperability cases with lambda. class ProgramManager { From 0035e745eeed6002b45edebaacaf22e45c904cf7 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 27 Nov 2024 14:37:12 +0800 Subject: [PATCH 26/37] fix clang format Signed-off-by: jinge90 --- sycl/source/detail/program_manager/program_manager.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index cbcf997a67d8a..a707ca5d4c5c7 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -87,7 +87,7 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_bfloat16, }; -enum class DeviceLibIsNative : std::uint32_t {Yes, No, Ignore}; +enum class DeviceLibIsNative : std::uint32_t { Yes, No, Ignore }; // Provides single loading and building OpenCL programs with unique contexts // that is necessary for no interoperability cases with lambda. From 230c8d9cf8e6b63756846ece0c069f3c3cf3e411 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 27 Nov 2024 15:59:56 +0800 Subject: [PATCH 27/37] fix incorrect usage of _aligned_malloc Signed-off-by: jinge90 --- llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index a33afdf5e236a..b3d8115b04192 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -187,7 +187,7 @@ PropSetRegTy computeModuleProperties(const Module &M, // std::aligned_alloc is not available in some pre-ci Windows machine. #if defined(_WIN32) || defined(_WIN64) uint8_t *SPVBuffer = reinterpret_cast( - _aligned_malloc(alignof(uint32_t), SPVSize + sizeof(uint32_t) * 2)); + _aligned_malloc(SPVSize + sizeof(uint32_t) * 2, alignof(uint32_t))); #else uint8_t *SPVBuffer = reinterpret_cast(std::aligned_alloc( alignof(uint32_t), SPVSize + sizeof(uint32_t) * 2)); From 3543b2d04bca6582b91cdea80ceef38ac7d6ac40 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 2 Dec 2024 10:26:26 +0800 Subject: [PATCH 28/37] remove sycl devicelib required binary property usage Signed-off-by: jinge90 --- .../SYCLLowerIR/ComputeModuleRuntimeInfo.h | 3 +- ...redDeviceLibs.h => SYCLDeviceLibReqMask.h} | 27 +- llvm/include/llvm/Support/PropertySetIO.h | 2 +- llvm/lib/SYCLLowerIR/CMakeLists.txt | 2 +- .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 62 +---- ...eviceLibs.cpp => SYCLDeviceLibReqMask.cpp} | 90 +++--- llvm/lib/Support/PropertySetIO.cpp | 2 +- .../device-requirements/mask.ll | 20 ++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 11 +- .../lib/rtc/DeviceCompilation.cpp | 2 +- sycl/source/detail/compiler.hpp | 7 +- sycl/source/detail/device_binary_image.cpp | 3 - sycl/source/detail/device_binary_image.hpp | 4 - .../program_manager/program_manager.cpp | 261 +++--------------- .../program_manager/program_manager.hpp | 12 +- 15 files changed, 109 insertions(+), 399 deletions(-) rename llvm/include/llvm/SYCLLowerIR/{SYCLRequiredDeviceLibs.h => SYCLDeviceLibReqMask.h} (64%) rename llvm/lib/SYCLLowerIR/{SYCLRequiredDeviceLibs.cpp => SYCLDeviceLibReqMask.cpp} (93%) create mode 100644 llvm/test/tools/sycl-post-link/device-requirements/mask.ll diff --git a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h index a1402f74faab9..e7cff6c730051 100644 --- a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h @@ -34,8 +34,7 @@ using EntryPointSet = SetVector; PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, - const GlobalBinImageProps &GlobProps, - const StringRef &DeviceLibSPVLoc); + const GlobalBinImageProps &GlobProps); std::string computeModuleSymbolTable(const Module &M, const EntryPointSet &EntryPoints); diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h similarity index 64% rename from llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h rename to llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h index 2df741fdc9f24..c9b737e2d053a 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h @@ -1,4 +1,4 @@ -//===----- SYCLRequiredDeviceLibs.h - get SYCL devicelib required Info ----===// +//===----- SYCLDeviceLibReqMask.h - get SYCL devicelib required Info -----=-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,8 +16,6 @@ #pragma once -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringRef.h" #include namespace llvm { @@ -40,27 +38,6 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_bfloat16, }; -enum class DeviceLibIsNative : std::uint32_t { Yes, No, Ignore }; - -struct SYCLDeviceLibSPVMeta { - DeviceLibExt SPVExt; - const char *SPVFileName; - DeviceLibIsNative IsNative; -}; - -struct SYCLDeviceLibSPVBinary { - typedef uint8_t value_type; - value_type *SPVRawBytes; - size_t SPVBytesNum; - SYCLDeviceLibSPVBinary(value_type *RawB, size_t BNum) { - SPVRawBytes = RawB; - SPVBytesNum = BNum; - } - value_type *data() const { return SPVRawBytes; } - size_t size() const { return SPVBytesNum; } -}; - -void getRequiredSYCLDeviceLibs(const Module &M, - SmallVector &ReqLibs); +uint32_t getSYCLDeviceLibReqMask(const Module &M); } // namespace llvm diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 86f91483a0c5d..13cb687f3b08b 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -199,7 +199,7 @@ class PropertySetRegistry { "SYCL/specialization constants"; static constexpr char SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[] = "SYCL/specialization constants default values"; - static constexpr char SYCL_DEVICELIB_REQ_BINS[] = "SYCL/devicelib req bins"; + static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask"; static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt"; static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata"; static constexpr char SYCL_MISC_PROP[] = "SYCL/misc properties"; diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 3977a8fcc6b68..fc254a4809bb5 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -61,7 +61,7 @@ add_llvm_component_library(LLVMSYCLLowerIR SYCLAddOptLevelAttribute.cpp SYCLConditionalCallOnDevice.cpp SYCLCreateNVVMAnnotations.cpp - SYCLRequiredDeviceLibs.cpp + SYCLDeviceLibReqMask.cpp SYCLDeviceRequirements.cpp SYCLKernelParamOptInfo.cpp SYCLJointMatrixTransform.cpp diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 7e2aa0ebdb8d1..cfea28538017c 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -17,15 +17,12 @@ #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/ModuleSplitter.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h" -#include "llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SpecConstants.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" #include #include - #ifndef NDEBUG constexpr int DebugModuleProps = 0; #endif @@ -155,59 +152,14 @@ std::optional getKernelSingleEltMetadata(const Function &Func, PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, - const GlobalBinImageProps &GlobProps, - const StringRef &DeviceLibSPVLoc) { + const GlobalBinImageProps &GlobProps) { PropSetRegTy PropSet; - // If sycl-post-link doesn't specify a valid fallback spv path, the param - // 'DeviceLibSPVLoc' is set to an empty string. - if (!DeviceLibSPVLoc.empty()) { - SmallVector RequiredLibs; - llvm::getRequiredSYCLDeviceLibs(M, RequiredLibs); - for (auto ExtMeta : RequiredLibs) { - std::string SPVPath = - DeviceLibSPVLoc.str() + "/" + std::string(ExtMeta.SPVFileName); - if (!llvm::sys::fs::exists(SPVPath)) - continue; - - auto SPVMB = llvm::MemoryBuffer::getFile(SPVPath); - if (!SPVMB) - continue; - - size_t SPVSize = (*SPVMB)->getBufferSize(); - // std::aligned_alloc is not available in some pre-ci Windows machine. -#if defined(_WIN32) || defined(_WIN64) - uint8_t *SPVBuffer = reinterpret_cast( - _aligned_malloc(SPVSize + sizeof(uint32_t) * 2, alignof(uint32_t))); -#else - uint8_t *SPVBuffer = reinterpret_cast(std::aligned_alloc( - alignof(uint32_t), SPVSize + sizeof(uint32_t) * 2)); -#endif - - if (!SPVBuffer) - continue; - - // The data embedded consists of 3 parts, overall layout is following: - // |--devicelib ext(4 byte)--|--IsNative Flag(4 byte)--|--spv raw data--| - *(reinterpret_cast(SPVBuffer)) = - static_cast(ExtMeta.SPVExt); - - *(reinterpret_cast(SPVBuffer + sizeof(uint32_t))) = - static_cast(ExtMeta.IsNative); - std::memcpy(SPVBuffer + sizeof(uint32_t) * 2, (*SPVMB)->getBufferStart(), - SPVSize); - llvm::SYCLDeviceLibSPVBinary SPVBinaryObj(SPVBuffer, - SPVSize + sizeof(uint32_t) * 2); - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_BINS, ExtMeta.SPVFileName, - SPVBinaryObj); -#if defined(_WIN32) || defined(_WIN64) - _aligned_free(SPVBuffer); -#else - std::free(SPVBuffer); -#endif - } + { + uint32_t MRMask = getSYCLDeviceLibReqMask(M); + std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; + PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); } - { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, computeDeviceRequirements(M, EntryPoints).asMap()); @@ -501,7 +453,7 @@ PropSetRegTy computeModuleProperties(const Module &M, } PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, - "uses-virtual-functions-set", AllSets); + "uses-virtual-functions-set", AllSets); } } diff --git a/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp similarity index 93% rename from llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp rename to llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp index 6b2d912372308..12914d3763521 100644 --- a/llvm/lib/SYCLLowerIR/SYCLRequiredDeviceLibs.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp @@ -1,4 +1,4 @@ -//==---- SYCLRequiredDeviceLibs.cpp - get SYCL devicelib required Info -----==// +//==----- SYCLDeviceLibReqMask.cpp - get SYCL devicelib required Info ------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,8 +14,7 @@ // SYCL runtime later. //===----------------------------------------------------------------------===// -#include "llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/IR/Module.h" #include "llvm/TargetParser/Triple.h" @@ -733,73 +732,46 @@ SYCLDeviceLibFuncMap SDLMap = { DeviceLibExt::cl_intel_devicelib_bfloat16}, }; -} // namespace - -// One devicelib extension may correspond to multiple spv files, following -// map stores corresponding index values in SPVMetaList for an extension. -static std::unordered_map> - DeviceLibSPVExtMap = {{DeviceLibExt::cl_intel_devicelib_assert, {0}}, - {DeviceLibExt::cl_intel_devicelib_math, {1}}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, {2}}, - {DeviceLibExt::cl_intel_devicelib_complex, {3}}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, {4}}, - {DeviceLibExt::cl_intel_devicelib_cstring, {5}}, - {DeviceLibExt::cl_intel_devicelib_imf, {6}}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, {7}}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, {8}}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, {9, 10}}}; +// Each fallback device library corresponds to one bit in "require mask" which +// is an unsigned int32. getDeviceLibBit checks which fallback device library +// is required for FuncName and returns the corresponding bit. The corresponding +// mask for each fallback device library is: +// cl_intel_devicelib_assert: 0x1 +// cl_intel_devicelib_math: 0x2 +// cl_intel_devicelib_math_fp64: 0x4 +// cl_intel_devicelib_complex: 0x8 +// cl_intel_devicelib_complex_fp64: 0x10 +// cl_intel_devicelib_cstring : 0x20 +// cl_intel_devicelib_imf: 0x40 +// cl_intel_devicelib_imf_fp64: 0x80 +// cl_intel_devicelib_imf_bf16: 0x100 +// cl_intel_devicelib_bfloat16: 0x200 +uint32_t getDeviceLibBits(const std::string &FuncName) { + auto DeviceLibFuncIter = SDLMap.find(FuncName); + return ((DeviceLibFuncIter == SDLMap.end()) + ? 0 + : 0x1 << (static_cast(DeviceLibFuncIter->second) - + static_cast( + DeviceLibExt::cl_intel_devicelib_assert))); +} -static SYCLDeviceLibSPVMeta SPVMetaList[] = { - {DeviceLibExt::cl_intel_devicelib_assert, "libsycl-fallback-cassert.spv", - DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_math, "libsycl-fallback-cmath.spv", - DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, - "libsycl-fallback-cmath-fp64.spv", DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_complex, "libsycl-fallback-complex.spv", - DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, - "libsycl-fallback-complex-fp64.spv", DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_cstring, "libsycl-fallback-cstring.spv", - DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_imf, "libsycl-fallback-imf.spv", - DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, "libsycl-fallback-imf-fp64.spv", - DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, "libsycl-fallback-imf-bf16.spv", - DeviceLibIsNative::Ignore}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, "libsycl-fallback-bfloat16.spv", - DeviceLibIsNative::No}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, "libsycl-native-bfloat16.spv", - DeviceLibIsNative::Yes}}; +} // namespace -namespace llvm { // For each device image module, we go through all functions which meets // 1. The function name has prefix "__devicelib_" // 2. The function is declaration which means it doesn't have function body // And we don't expect non-spirv functions with "__devicelib_" prefix. -void getRequiredSYCLDeviceLibs( - const Module &M, - llvm::SmallVector &ReqDeviceLibs) { +uint32_t llvm::getSYCLDeviceLibReqMask(const Module &M) { // Device libraries will be enabled only for spir-v module. if (!Triple(M.getTargetTriple()).isSPIROrSPIRV()) - return; - - SmallSet DeviceLibUsed; + return 0; + uint32_t ReqMask = 0; for (const Function &SF : M) { if (SF.getName().starts_with(DEVICELIB_FUNC_PREFIX) && SF.isDeclaration()) { assert(SF.getCallingConv() == CallingConv::SPIR_FUNC); - auto DeviceLibFuncIter = SDLMap.find(SF.getName().str()); - if (DeviceLibFuncIter == SDLMap.end()) - continue; - if (DeviceLibUsed.contains(DeviceLibFuncIter->second)) - continue; - - DeviceLibUsed.insert(DeviceLibFuncIter->second); - for (size_t idx : DeviceLibSPVExtMap[DeviceLibFuncIter->second]) { - ReqDeviceLibs.push_back(SPVMetaList[idx]); - } + uint32_t DeviceLibBits = getDeviceLibBits(SF.getName().str()); + ReqMask |= DeviceLibBits; } } + return ReqMask; } -} // namespace llvm diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index edc46dcbb0c13..2fe7cac00fb14 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -195,7 +195,7 @@ void PropertyValue::copy(const PropertyValue &P) { } constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[]; -constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_BINS[]; +constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[]; constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[]; constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[]; constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[]; diff --git a/llvm/test/tools/sycl-post-link/device-requirements/mask.ll b/llvm/test/tools/sycl-post-link/device-requirements/mask.ll new file mode 100644 index 0000000000000..31b393249bf5f --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-requirements/mask.ll @@ -0,0 +1,20 @@ +; RUN: sycl-post-link -properties -split=auto < %s -o %t.files.table +; RUN: FileCheck %s -input-file=%t.files_0.prop + +; CHECK:[SYCL/devicelib req mask] +; CHECK: DeviceLibReqMask=1|64 + +source_filename = "main.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spirv64-unknown-unknown" + +declare spir_func i32 @__devicelib_imf_umulhi(i32 noundef %0, i32 noundef %1) + +; Function Attrs: convergent mustprogress noinline norecurse optnone +define weak_odr dso_local spir_kernel void @kernel() #0 { +entry: + %0 = call i32 @__devicelib_imf_umulhi(i32 0, i32 0) + ret void +} + +attributes #0 = { "sycl-module-id"="main.cpp" } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index b4ff9d63374b5..e84faf464c42e 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -104,11 +104,6 @@ cl::opt OutputDir{ "Directory where files listed in the result file table will be output"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; -cl::opt DeviceLibSPVDir{ - "device-lib-spv-dir", - cl::desc("Directory where sycl fallback spirv device libraries reside"), - cl::value_desc("dirname"), cl::cat(PostLinkCat)}; - struct TargetFilenamePair { std::string Target; std::string Filename; @@ -312,12 +307,8 @@ std::string saveModuleIR(Module &M, int I, StringRef Suff) { std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, StringRef Suff, StringRef Target = "") { - - StringRef SPVDir = ""; - if (DeviceLibSPVDir.getNumOccurrences() > 0) - SPVDir = DeviceLibSPVDir; auto PropSet = - computeModuleProperties(MD.getModule(), MD.entries(), GlobProps, SPVDir); + computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); std::string NewSuff = Suff.str(); if (!Target.empty()) { diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 35a23c2c61a22..1d75136160e99 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -425,7 +425,7 @@ Expected jit_compiler::performPostLink( /*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true, /*DeviceGlobals=*/false}; PropertySetRegistry Properties = - computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq, ""); + computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq); // TODO: Manually add `compile_target` property as in // `saveModuleProperties`? const auto &PropertySets = Properties.getPropSets(); diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 42e0981ccc109..35f313ceec3f5 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -44,13 +44,8 @@ /// PropertySetIO.h #define __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP \ "SYCL/specialization constants default values" -/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK is defined in legacy version -/// compiler, we keep this property here and to be compatible with program -/// built with these legacy version compiler. -/// TODO: clear this deprecated property when users upgrade to latest compiler. +/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" -/// PropertySetRegistry::SYCL_DEVICELIB_REQ_BINS defined in PropertySetIO.h -#define __SYCL_PROPERTY_SET_DEVICELIB_REQ_BINS "SYCL/devicelib req bins" /// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt" /// PropertySetRegistry::SYCL_KERNEL_PROGRAM_METADATA defined in PropertySetIO.h diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 65a2dedf2d0e8..2be48d4a38fce 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -179,10 +179,7 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { SpecConstIDMap.init(Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); SpecConstDefaultValuesMap.init( Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); - // TODO: clear deprecated DeviceLibReqMask when developers upgrade to latest - // compiler. DeviceLibReqMask.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); - DeviceLibReqBins.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_BINS); KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); AssertUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); ProgramMetadata.init(Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA); diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index d6eb4db7c83c2..203427b89ca45 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -213,10 +213,7 @@ class RTDeviceBinaryImage { const PropertyRange &getSpecConstantsDefaultValues() const { return SpecConstDefaultValuesMap; } - // TODO: clear deprecated DeviceLibReqMask when developers upgrade to latest - // version compiler. const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; } - const PropertyRange &getDeviceLibReqBins() const { return DeviceLibReqBins; } const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } @@ -249,7 +246,6 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange SpecConstIDMap; RTDeviceBinaryImage::PropertyRange SpecConstDefaultValuesMap; RTDeviceBinaryImage::PropertyRange DeviceLibReqMask; - RTDeviceBinaryImage::PropertyRange DeviceLibReqBins; RTDeviceBinaryImage::PropertyRange KernelParamOptInfo; RTDeviceBinaryImage::PropertyRange AssertUsed; RTDeviceBinaryImage::PropertyRange ProgramMetadata; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index f51f8f2372b56..58b6aa05afed6 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -753,8 +752,7 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } } -static inline void -CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) @@ -851,16 +849,18 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( programReleaseInfo.getFuncPtrFromModule(ur::getURLoaderLibrary()); ProgramPtr ProgramManaged(NativePrg, programRelease); - std::vector ImagesVec; // Link a fallback implementation of device libraries if they are not // supported by a device compiler. // Pre-compiled programs (after AOT compilation or read from persitent // cache) are supposed to be already linked. + // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means + // no fallback device library will be linked. + uint32_t DeviceLibReqMask = 0; bool UseDeviceLibs = !DeviceCodeWasInCache && Img.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV && !SYCLConfig::get(); if (UseDeviceLibs) - ImagesVec.push_back(&Img); + DeviceLibReqMask = getDeviceLibReqMask(Img); std::vector ProgramsToLink; // If we had a program in cache, then it should have been the fully linked @@ -868,7 +868,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( if (!DeviceCodeWasInCache) { for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) { if (UseDeviceLibs) - ImagesVec.push_back(BinImg); + DeviceLibReqMask |= getDeviceLibReqMask(*BinImg); ur_program_handle_t NativePrg = createURProgram(*BinImg, Context, Devs); @@ -885,7 +885,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( ProgramPtr BuiltProgram = build( std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, - URDevices, ImagesVec, ProgramsToLink, + URDevices, DeviceLibReqMask, ProgramsToLink, /*CreatedFromBinary*/ Img.getFormat() != SYCL_DEVICE_BINARY_TYPE_SPIRV); // Those extra programs won't be used anymore, just the final linked result @@ -1152,8 +1152,8 @@ ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, // TODO device libraries may use scpecialization constants, manifest files, etc. // To support that they need to be delivered in a different container - so that // sycl_device_binary_struct can be created for each of them. -static bool loadDeviceLibLegacy(const ContextImplPtr Context, const char *Name, - ur_program_handle_t &Prog) { +static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, + ur_program_handle_t &Prog) { std::string LibSyclDir = OSUtil::getCurrentDSODir(); std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, std::ifstream::in | std::ifstream::binary); @@ -1173,13 +1173,6 @@ static bool loadDeviceLibLegacy(const ContextImplPtr Context, const char *Name, return Prog != nullptr; } -static bool loadDeviceLib(const ContextImplPtr Context, - ur_program_handle_t &Prog, - const unsigned char *SPVBuffer, size_t SPVSize) { - Prog = createSpirvProgram(Context, SPVBuffer, SPVSize); - return Prog != nullptr; -} - // For each extension, a pair of library names. The first uses native support, // the second emulates functionality in software. static const std::map> @@ -1260,13 +1253,9 @@ static ur_result_t doCompile(const AdapterPtr &Adapter, static ur_program_handle_t loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, std::vector &Devices, - bool UseNativeLib, bool LegacyMode = true, - const unsigned char *SPVBuffer = nullptr, - size_t SPVSize = 0) { + bool UseNativeLib) { - const char *LibFileName = nullptr; - if (LegacyMode) - LibFileName = getDeviceLibFilename(Extension, UseNativeLib); + auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); auto LockedCache = Context->acquireCachedLibPrograms(); auto &CachedLibPrograms = LockedCache.get(); // Collect list of devices to compile the library for. Library was already @@ -1303,20 +1292,10 @@ loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, bool IsProgramCreated = !URProgram; // Create UR program for device lib if we don't have it yet. - if (LegacyMode) { - if (!URProgram && !loadDeviceLibLegacy(Context, LibFileName, URProgram)) { - EraseProgramForDevices(); - throw exception(make_error_code(errc::build), - std::string("Failed to load ") + LibFileName); - } - } else { - if (!URProgram && !loadDeviceLib(Context, URProgram, SPVBuffer, SPVSize)) { - EraseProgramForDevices(); - const char *ExtStr = getDeviceLibExtensionStr(Extension); - throw exception( - make_error_code(errc::build), - std::string("Failed to load fallback device library for ") + ExtStr); - } + if (!URProgram && !loadDeviceLib(Context, LibFileName, URProgram)) { + EraseProgramForDevices(); + throw exception(make_error_code(errc::build), + std::string("Failed to load ") + LibFileName); } // Insert URProgram into the cache for all devices that we compiled it for. @@ -1574,12 +1553,10 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { return ((DeviceLibReqMask & Mask) == Mask); } -// TODO: Clear legacy getDeviceLibPrograms when developers upgrade to -// latest version compiler. static std::vector -getDeviceLibProgramsLegacy(const ContextImplPtr Context, - std::vector &Devices, - uint32_t DeviceLibReqMask) { +getDeviceLibPrograms(const ContextImplPtr Context, + std::vector &Devices, + uint32_t DeviceLibReqMask) { std::vector Programs; std::pair RequiredDeviceLibExt[] = { @@ -1662,168 +1639,6 @@ getDeviceLibProgramsLegacy(const ContextImplPtr Context, return Programs; } -static std::vector -getDeviceLibPrograms(const ContextImplPtr Context, - std::vector &Devices, - const std::vector &Images) { - std::vector Programs; - std::map DeviceLibExtLoaded = { - {DeviceLibExt::cl_intel_devicelib_assert, - /* is fallback loaded? */ false}, - {DeviceLibExt::cl_intel_devicelib_math, false}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_complex, false}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_cstring, false}, - {DeviceLibExt::cl_intel_devicelib_imf, false}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, false}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, false}}; - - // Check whether a specified extension is supported by ALL devices. - auto checkExtForDevices = [&Context, &Devices](const char *ExtStr) -> bool { - bool ExtAvailable = true; - for (auto SingleDevice : Devices) { - std::string DevExtList = - Context->getPlatformImpl() - ->getDeviceImpl(SingleDevice) - ->get_device_info_string( - UrInfoCode::value); - if (DevExtList.npos == DevExtList.find(ExtStr)) { - ExtAvailable = false; - break; - } - } - return ExtAvailable; - }; - - const bool fp64Support = checkExtForDevices("cl_khr_fp64"); - - for (auto Img : Images) { - if (!Img) - continue; - const RTDeviceBinaryImage::PropertyRange &NewRange = - Img->getDeviceLibReqBins(); - if (!NewRange.isAvailable()) - continue; - - for (const auto &DeviceLibBinProp : NewRange) { - auto DeviceLibByteArray = - DeviceBinaryProperty(DeviceLibBinProp).asByteArray(); - DeviceLibByteArray.dropBytes(8); - DeviceLibExt DeviceLibExtReq = static_cast( - (static_cast(DeviceLibByteArray[3]) << 24) | - (static_cast(DeviceLibByteArray[2]) << 16) | - (static_cast(DeviceLibByteArray[1]) << 8) | - DeviceLibByteArray[0]); - if (DeviceLibExtLoaded.count(DeviceLibExtReq) != 1) { - if constexpr (DbgProgMgr > 0) { - std::cerr << "Unknown DeviceLib extension(" - << static_cast(DeviceLibExtReq) << ")!" - << std::endl; - } - continue; - } - - if (DeviceLibExtLoaded[DeviceLibExtReq]) - continue; - - if ((DeviceLibExtReq == DeviceLibExt::cl_intel_devicelib_math_fp64 || - DeviceLibExtReq == DeviceLibExt::cl_intel_devicelib_complex_fp64 || - DeviceLibExtReq == DeviceLibExt::cl_intel_devicelib_imf_fp64) && - !fp64Support) - continue; - - DeviceLibByteArray.dropBytes(4); - DeviceLibIsNative IsNativeSPV = static_cast( - (static_cast(DeviceLibByteArray[3]) << 24) | - (static_cast(DeviceLibByteArray[2]) << 16) | - (static_cast(DeviceLibByteArray[1]) << 8) | - DeviceLibByteArray[0]); - auto DeviceLibExtReqName = getDeviceLibExtensionStr(DeviceLibExtReq); - bool InhibitNativeImpl = false; - if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { - InhibitNativeImpl = strstr(Env, DeviceLibExtReqName) != nullptr; - } - - bool ExtReqAvailable = checkExtForDevices(DeviceLibExtReqName); - - // Load fallback device library only when 1) or 2) is met: - // 1. underlying device doesn't support the extension - // 2. user explicitly ask to inhibit usage of native support - if (!ExtReqAvailable || InhibitNativeImpl) { - if (IsNativeSPV == DeviceLibIsNative::Yes) - continue; - DeviceLibByteArray.dropBytes(4); - Programs.push_back(loadDeviceLibFallback( - Context, DeviceLibExtReq, Devices, - /*UseNativeLib=*/false, false, DeviceLibByteArray.begin(), - DeviceLibByteArray.size())); - } else { - // bfloat16 spv has native and generic version, if native support is - // available in underlying device, we should use native version and - // ignore generic version. - if (IsNativeSPV != DeviceLibIsNative::Yes) - continue; - DeviceLibByteArray.dropBytes(4); - Programs.push_back(loadDeviceLibFallback( - Context, DeviceLibExtReq, Devices, - /*UseNativeLib=*/true, false, DeviceLibByteArray.begin(), - DeviceLibByteArray.size())); - } - } - } - return Programs; -} - -static void -checkDeviceLibsLinkMode(const std::vector &Images, - bool &LinkDeviceLib, bool &LegacyLinkMode) { - bool ReqMaskAvailable = false, ReqBinsAvailable = false; - for (auto Img : Images) { - if (!Img) - continue; - const RTDeviceBinaryImage::PropertyRange &LegacyRange = - Img->getDeviceLibReqMask(); - if (LegacyRange.isAvailable()) { - ReqMaskAvailable = true; - continue; - } - - const RTDeviceBinaryImage::PropertyRange &NewRange = - Img->getDeviceLibReqBins(); - if (NewRange.isAvailable()) - ReqBinsAvailable = true; - } - - // If both ReqBins and ReqMask are available, it means user's device image - // and the images in cache are built with different version compiler, we - // don't support such scenario. - if ((!ReqMaskAvailable && !ReqBinsAvailable) || - (ReqMaskAvailable && ReqBinsAvailable)) { - LinkDeviceLib = false; - return; - } - - LinkDeviceLib = true; - LegacyLinkMode = ReqMaskAvailable; -} - -static uint32_t getDeviceLibReqMaskFromImages( - const std::vector &Images) { - uint32_t DeviceLibReqMask = 0; - for (auto Img : Images) { - if (!Img) - continue; - const RTDeviceBinaryImage::PropertyRange &ReqMaskRange = - Img->getDeviceLibReqMask(); - if (ReqMaskRange.isAvailable()) - DeviceLibReqMask |= - DeviceBinaryProperty(*(ReqMaskRange.begin())).asUint32(); - } - return DeviceLibReqMask; -} - // Check if device image is compressed. static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { @@ -1834,25 +1649,19 @@ static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { ProgramManager::ProgramPtr ProgramManager::build( ProgramPtr Program, const ContextImplPtr Context, const std::string &CompileOptions, const std::string &LinkOptions, - std::vector &Devices, - const std::vector &Images, + std::vector &Devices, uint32_t DeviceLibReqMask, const std::vector &ExtraProgramsToLink, bool CreatedFromBinary) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::build(" << Program.get() << ", " << CompileOptions << ", " << LinkOptions << ", " - << VecToString(Devices) << ", " << std::dec << ", " - << VecToString(ExtraProgramsToLink) << ", " << CreatedFromBinary - << ")\n"; + << VecToString(Devices) << ", " << std::hex << DeviceLibReqMask + << std::dec << ", " << VecToString(ExtraProgramsToLink) << ", " + << CreatedFromBinary << ")\n"; } - bool LinkDeviceLibs = false; - bool LegacyDeviceLibLinkMode = false; - if (Images.size() == 0) - LinkDeviceLibs = false; - else - checkDeviceLibsLinkMode(Images, LinkDeviceLibs, LegacyDeviceLibLinkMode); + bool LinkDeviceLibs = (DeviceLibReqMask != 0); // TODO: this is a temporary workaround for GPU tests for ESIMD compiler. // We do not link with other device libraries, because it may fail @@ -1863,13 +1672,7 @@ ProgramManager::ProgramPtr ProgramManager::build( std::vector LinkPrograms; if (LinkDeviceLibs) { - if (LegacyDeviceLibLinkMode) { - uint32_t DeviceLibReqMask = getDeviceLibReqMaskFromImages(Images); - LinkPrograms = - getDeviceLibProgramsLegacy(Context, Devices, DeviceLibReqMask); - } else { - LinkPrograms = getDeviceLibPrograms(Context, Devices, Images); - } + LinkPrograms = getDeviceLibPrograms(Context, Devices, DeviceLibReqMask); } static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK"); @@ -2185,6 +1988,15 @@ void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img, F.close(); } +uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { + const RTDeviceBinaryImage::PropertyRange &DLMRange = + Img.getDeviceLibReqMask(); + if (DLMRange.isAvailable()) + return DeviceBinaryProperty(*(DLMRange.begin())).asUint32(); + else + return 0x0; +} + const KernelArgMask * ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, const std::string &KernelName) { @@ -2973,12 +2785,11 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( // No linking of extra programs reqruired. std::vector ExtraProgramsToLink; std::vector Devs = {DeviceImpl->getHandleRef()}; - std::vector ImagesVec; - // For non-spirv target, we don't need to link any fallback device library. - // An empty images vector will skip linking fallback device libraries. auto BuildProgram = build(std::move(ProgramManaged), detail::getSyclObjImpl(Context), - CompileOpts, LinkOpts, Devs, ImagesVec, ExtraProgramsToLink); + CompileOpts, LinkOpts, Devs, + /*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0, + ExtraProgramsToLink); ur_kernel_handle_t UrKernel{nullptr}; Adapter->call( BuildProgram.get(), KernelName.c_str(), &UrKernel); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index a707ca5d4c5c7..75689aadfb290 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -71,9 +71,9 @@ class device_impl; using DeviceImplPtr = std::shared_ptr; class queue_impl; class event_impl; -// DeviceLibExt and DeviceLibIsNaitve are shared between sycl runtime and -// SYCL Post Link tool. If any update is made here, please sync with definition -// in llvm/llvm/include/llvm/SYCLLowerIR/SYCLRequiredDeviceLibs.h +// DeviceLibExt is shared between sycl runtime and sycl-post-link tool. +// If any update is made here, need to sync with DeviceLibExt definition +// in llvm/tools/sycl-post-link/sycl-post-link.cpp enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_assert, cl_intel_devicelib_math, @@ -87,8 +87,6 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_bfloat16, }; -enum class DeviceLibIsNative : std::uint32_t { Yes, No, Ignore }; - // Provides single loading and building OpenCL programs with unique contexts // that is necessary for no interoperability cases with lambda. class ProgramManager { @@ -193,6 +191,8 @@ class ProgramManager { static std::string getProgramBuildLog(const ur_program_handle_t &Program, const ContextImplPtr Context); + uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); + /// Returns the mask for eliminated kernel arguments for the requested kernel /// within the native program. /// \param NativePrg the UR program associated with the kernel. @@ -321,7 +321,7 @@ class ProgramManager { const std::string &CompileOptions, const std::string &LinkOptions, std::vector &Devices, - const std::vector &Images, + uint32_t DeviceLibReqMask, const std::vector &ProgramsToLink, bool CreatedFromBinary = false); From 2e6387c18a6896ba1d8cf86643ec4742b255fa26 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 2 Dec 2024 10:51:13 +0800 Subject: [PATCH 29/37] rename device-lib-spv-dir to device-lib-dir Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/Clang.cpp | 14 +++++++------- clang/lib/Driver/ToolChains/SYCL.cpp | 13 +++---------- clang/lib/Driver/ToolChains/SYCL.h | 3 +-- clang/test/Driver/sycl-linker-wrapper-image.cpp | 15 +++++++++------ clang/test/Driver/sycl-post-link-options-win.cpp | 2 +- clang/test/Driver/sycl-post-link-options.cpp | 2 +- .../clang-linker-wrapper/ClangLinkerWrapper.cpp | 16 ++++++---------- llvm/tools/sycl-post-link/sycl-post-link.cpp | 5 +++++ 8 files changed, 33 insertions(+), 37 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 084af176509ff..984b0508e64a8 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10947,15 +10947,15 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (TC.getTriple().isSPIROrSPIRV() && !TC.getTriple().isSPIRAOT() && !DeviceLibDisable) { SYCLInstallationDetector SYCLInstall(TC.getDriver()); - SmallVector, 4> SpvLocCandidates; - SmallString<128> FallbackAssertName("libsycl-fallback-cassert.spv"); - SYCLInstall.getSYCLDeviceLibPath(SpvLocCandidates, true); - for (const auto &SpvLoc : SpvLocCandidates) { - SmallString<128> FullLibName(SpvLoc); + SmallVector, 4> DeviceLibLocCandidates; + SmallString<128> FallbackAssertName("libsycl-fallback-cassert.bc"); + SYCLInstall.getSYCLDeviceLibPath(DeviceLibLocCandidates); + for (const auto &DeviceLibLoc : DeviceLibLocCandidates) { + SmallString<128> FullLibName(DeviceLibLoc); llvm::sys::path::append(FullLibName, FallbackAssertName); if (llvm::sys::fs::exists(FullLibName)) { - SmallString<128> SYCLDeviceLibDir("--device-lib-spv-dir="); - SYCLDeviceLibDir += SpvLoc.str(); + SmallString<128> SYCLDeviceLibDir("--device-lib-dir="); + SYCLDeviceLibDir += DeviceLibLoc.str(); addArgs(PostLinkArgs, TCArgs, {SYCLDeviceLibDir.str()}); break; } diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 78c988da23801..4fa2bcef89bdb 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include "SYCL.h" #include "CommonArgs.h" #include "clang/Driver/Action.h" @@ -153,20 +152,14 @@ SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D) } void SYCLInstallationDetector::getSYCLDeviceLibPath( - llvm::SmallVector, 4> &DeviceLibPaths, - bool GetSPV) const { - auto TargetTriple = llvm::Triple(D.getTargetTriple()); + llvm::SmallVector, 4> &DeviceLibPaths) const { for (const auto &IC : InstallationCandidates) { llvm::SmallString<128> InstallLibPath(IC.str()); - if (GetSPV && TargetTriple.isOSWindows()) - InstallLibPath.append("/bin"); - else - InstallLibPath.append("/lib"); + InstallLibPath.append("/lib"); DeviceLibPaths.emplace_back(InstallLibPath); } - if (!GetSPV || !TargetTriple.isOSWindows()) - DeviceLibPaths.emplace_back(D.SysRoot + "/lib"); + DeviceLibPaths.emplace_back(D.SysRoot + "/lib"); } void SYCLInstallationDetector::AddSYCLIncludeArgs( diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 2c9fcf47b455a..65941ae356549 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -129,8 +129,7 @@ class SYCLInstallationDetector { public: SYCLInstallationDetector(const Driver &D); void getSYCLDeviceLibPath( - llvm::SmallVector, 4> &DeviceLibPaths, - bool GetSPV = false) const; + llvm::SmallVector, 4> &DeviceLibPaths) const; void AddSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const; void print(llvm::raw_ostream &OS) const; diff --git a/clang/test/Driver/sycl-linker-wrapper-image.cpp b/clang/test/Driver/sycl-linker-wrapper-image.cpp index df05d49b939b5..fb81abd4b7ef0 100644 --- a/clang/test/Driver/sycl-linker-wrapper-image.cpp +++ b/clang/test/Driver/sycl-linker-wrapper-image.cpp @@ -44,18 +44,21 @@ int main() { // CHECK-DAG: @.sycl_offloading.target.0 = internal unnamed_addr constant [7 x i8] c"spir64\00" // CHECK-DAG: @.sycl_offloading.opts.compile.0 = internal unnamed_addr constant [1 x i8] zeroinitializer // CHECK-DAG: @.sycl_offloading.opts.link.0 = internal unnamed_addr constant [1 x i8] zeroinitializer -// CHECK-DAG: @prop = internal unnamed_addr constant [8 x i8] c"aspects\00" +// CHECK-DAG: @prop = internal unnamed_addr constant [17 x i8] c"DeviceLibReqMask\00" +// CHECK-DAG: @__sycl_offload_prop_sets_arr = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop, ptr null, i32 1, i64 0 }] +// CHECK-DAG: @SYCL_PropSetName = internal unnamed_addr constant [24 x i8] c"SYCL/devicelib req mask\00" +// CHECK-DAG: @prop.1 = internal unnamed_addr constant [8 x i8] c"aspects\00" // CHECK-DAG: @prop_val = internal unnamed_addr constant [8 x i8] zeroinitializer -// CHECK-DAG: @__sycl_offload_prop_sets_arr = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop, ptr @prop_val, i32 2, i64 8 }] -// CHECK-DAG: @SYCL_PropSetName = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" -// CHECK-DAG: @SYCL_PropSetName.1 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" -// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [2 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.1, ptr null, ptr null }] +// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop.1, ptr @prop_val, i32 2, i64 8 }] +// CHECK-DAG: @SYCL_PropSetName.3 = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00" +// CHECK-DAG: @SYCL_PropSetName.4 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00" +// CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }] // CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [772 x i8] // CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00" // CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { ptr null, ptr @__sycl_offload_entry_name, i64 0, i32 0, i32 0 }] // CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 772], section ".tgtimg", align 16 // CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata" -// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([772 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 772), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([2 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 2) }] +// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([772 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 772), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }] // CHECK-DAG: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null } // CHECK-DAG: @llvm.global_ctors = {{.*}} { i32 1, ptr @sycl.descriptor_reg, ptr null }] // CHECK-DAG: @llvm.global_dtors = {{.*}} { i32 1, ptr @sycl.descriptor_unreg, ptr null }] diff --git a/clang/test/Driver/sycl-post-link-options-win.cpp b/clang/test/Driver/sycl-post-link-options-win.cpp index 16ab31ef37891..137ee1a89de4b 100644 --- a/clang/test/Driver/sycl-post-link-options-win.cpp +++ b/clang/test/Driver/sycl-post-link-options-win.cpp @@ -3,7 +3,7 @@ // RUN: %clangxx -### --target=x86_64-pc-windows-msvc -fsycl \ // RUN: -Xdevice-post-link -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s -// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-spv-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" +// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" // ------- // Generate .o file as linker wrapper input. // diff --git a/clang/test/Driver/sycl-post-link-options.cpp b/clang/test/Driver/sycl-post-link-options.cpp index 57235a3cf97a4..8f8fe14325d47 100644 --- a/clang/test/Driver/sycl-post-link-options.cpp +++ b/clang/test/Driver/sycl-post-link-options.cpp @@ -3,7 +3,7 @@ // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl -### \ // RUN: --no-offload-new-driver -Xdevice-post-link -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s -// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-spv-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" +// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" // // Generate .o file as linker wrapper input. // diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 90a2184801850..17a74c47d624c 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -730,16 +730,12 @@ runSYCLPostLinkTool(ArrayRef InputFiles, const ArgList &Args) { const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ); if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) { - std::string SYCLDeviceLibSPVLoc = SYCLDeviceLibLoc->getValue(); - llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ)); - if (HostTriple.isOSWindows()) - SYCLDeviceLibSPVLoc += "../bin"; - - std::string AssertSPVLoc = - SYCLDeviceLibSPVLoc + "/libsycl-fallback-cassert.spv"; - if (llvm::sys::fs::exists(AssertSPVLoc)) { - SYCLDeviceLibSPVLoc = "--device-lib-spv-dir=" + SYCLDeviceLibSPVLoc; - CmdArgs.push_back(Args.MakeArgString(StringRef(SYCLDeviceLibSPVLoc))); + std::string SYCLDeviceLibLocParam = SYCLDeviceLibLoc->getValue(); + std::string AssertDeviceLibLoc = + SYCLDeviceLibLocParam + "/libsycl-fallback-cassert.bc"; + if (llvm::sys::fs::exists(AssertDeviceLibLoc)) { + SYCLDeviceLibLocParam = "--device-lib-dir=" + SYCLDeviceLibLocParam; + CmdArgs.push_back(Args.MakeArgString(StringRef(SYCLDeviceLibLocParam))); } } getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple); diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index e84faf464c42e..422e150efbc44 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -104,6 +104,11 @@ cl::opt OutputDir{ "Directory where files listed in the result file table will be output"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; +cl::opt DeviceLibDir{ + "device-lib-dir", + cl::desc("Directory where sycl fallback device libraries reside"), + cl::value_desc("dirname"), cl::cat(PostLinkCat)}; + struct TargetFilenamePair { std::string Target; std::string Filename; From fa9e6b5e8773c93f0a30595cb44fd0ead1be42e1 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 2 Dec 2024 19:01:24 +0800 Subject: [PATCH 30/37] Embed required devicelib modules as device image Signed-off-by: jinge90 --- .../SYCLLowerIR/ComputeModuleRuntimeInfo.h | 1 + .../llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h | 6 +- llvm/include/llvm/Support/PropertySetIO.h | 1 + .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 1 + llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp | 74 +++++++++++++++++++ llvm/lib/Support/PropertySetIO.cpp | 1 + llvm/tools/sycl-post-link/sycl-post-link.cpp | 68 +++++++++++++---- 7 files changed, 138 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h index e7cff6c730051..af2120d9dc702 100644 --- a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h @@ -12,6 +12,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/SYCLLowerIR/ModuleSplitter.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/Support/PropertySetIO.h" #include namespace llvm { diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h index c9b737e2d053a..8f3718c60ea88 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h @@ -17,6 +17,8 @@ #pragma once #include +#include +#include namespace llvm { @@ -39,5 +41,7 @@ enum class DeviceLibExt : std::uint32_t { }; uint32_t getSYCLDeviceLibReqMask(const Module &M); - +void getSYCLDeviceLibReqNames(unsigned int ReqMask, + std::vector &ReqNames); +unsigned int getSYCLDeviceLibMeta(std::string &DeviceLibFn); } // namespace llvm diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 13cb687f3b08b..fbdeca7ff28cd 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -200,6 +200,7 @@ class PropertySetRegistry { static constexpr char SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[] = "SYCL/specialization constants default values"; static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask"; + static constexpr char SYCL_DEVICELIB_METADATA[] = "SYCL/devicelib metadata"; static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt"; static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata"; static constexpr char SYCL_MISC_PROP[] = "SYCL/misc properties"; diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index cfea28538017c..258c955755dff 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -160,6 +160,7 @@ PropSetRegTy computeModuleProperties(const Module &M, std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); } + { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, computeDeviceRequirements(M, EntryPoints).asMap()); diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp index 12914d3763521..4ecbe5fe615a5 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp @@ -757,6 +757,28 @@ uint32_t getDeviceLibBits(const std::string &FuncName) { } // namespace +static std::unordered_map> + SYCLDeviceLibFn = { + {DeviceLibExt::cl_intel_devicelib_assert, + {"libsycl-fallback-cassert.bc"}}, + {DeviceLibExt::cl_intel_devicelib_math, {"libsycl-fallback-cmath.bc"}}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, + {"libsycl-fallback-cmath-fp64.bc"}}, + {DeviceLibExt::cl_intel_devicelib_complex, + {"libsycl-fallback-complex.bc"}}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, + {"libsycl-fallback-complex-fp64.bc"}}, + {DeviceLibExt::cl_intel_devicelib_cstring, + {"libsycl-fallback-cstring.bc"}}, + {DeviceLibExt::cl_intel_devicelib_imf, {"libsycl-fallback-imf.bc"}}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, + {"libsycl-fallback-imf-fp64.bc"}}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, + {"libsycl-fallback-imf-bf16.bc"}}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, + {"libsycl-fallback-bfloat16.bc", "libsycl-native-bfloat16.bc"}}, +}; + // For each device image module, we go through all functions which meets // 1. The function name has prefix "__devicelib_" // 2. The function is declaration which means it doesn't have function body @@ -775,3 +797,55 @@ uint32_t llvm::getSYCLDeviceLibReqMask(const Module &M) { } return ReqMask; } + +void llvm::getSYCLDeviceLibReqNames(unsigned int ReqMask, + std::vector &ReqNames) { + DeviceLibExt DeviceLibExts[] = {DeviceLibExt::cl_intel_devicelib_assert, + DeviceLibExt::cl_intel_devicelib_math, + DeviceLibExt::cl_intel_devicelib_math_fp64, + DeviceLibExt::cl_intel_devicelib_complex, + DeviceLibExt::cl_intel_devicelib_complex_fp64, + DeviceLibExt::cl_intel_devicelib_cstring, + DeviceLibExt::cl_intel_devicelib_imf, + DeviceLibExt::cl_intel_devicelib_imf_fp64, + DeviceLibExt::cl_intel_devicelib_imf_bf16, + DeviceLibExt::cl_intel_devicelib_bfloat16}; + + unsigned int Temp; + for (auto Ext : DeviceLibExts) { + Temp = + 0x1 << (static_cast(Ext) - + static_cast(DeviceLibExt::cl_intel_devicelib_assert)); + if (Temp & ReqMask) { + for (auto Fn : SYCLDeviceLibFn[Ext]) + ReqNames.push_back(std::string(Fn)); + } + } +} + +unsigned int llvm::getSYCLDeviceLibMeta(std::string &DeviceLibFn) { + DeviceLibExt DeviceLibExts[] = {DeviceLibExt::cl_intel_devicelib_assert, + DeviceLibExt::cl_intel_devicelib_math, + DeviceLibExt::cl_intel_devicelib_math_fp64, + DeviceLibExt::cl_intel_devicelib_complex, + DeviceLibExt::cl_intel_devicelib_complex_fp64, + DeviceLibExt::cl_intel_devicelib_cstring, + DeviceLibExt::cl_intel_devicelib_imf, + DeviceLibExt::cl_intel_devicelib_imf_fp64, + DeviceLibExt::cl_intel_devicelib_imf_bf16, + DeviceLibExt::cl_intel_devicelib_bfloat16}; + + unsigned int DeviceLibMeta = 0; + for (auto Ext : DeviceLibExts) { + for (auto Fn : SYCLDeviceLibFn[Ext]) { + if (DeviceLibFn == Fn) { + DeviceLibMeta = static_cast(Ext); + if (DeviceLibFn == "libsycl-native-bfloat16.bc") + DeviceLibMeta |= 0x80000000ULL; + break; + } + } + } + + return DeviceLibMeta; +} diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index 2fe7cac00fb14..38e14f1cc494e 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -196,6 +196,7 @@ void PropertyValue::copy(const PropertyValue &P) { constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[]; constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[]; +constexpr char PropertySetRegistry::SYCL_DEVICELIB_METADATA[]; constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[]; constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[]; constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[]; diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 422e150efbc44..3a74060a1cb81 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -12,7 +12,6 @@ // - module splitter to split a big input module into smaller ones // - specialization constant intrinsic transformation //===----------------------------------------------------------------------===// - #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" @@ -311,9 +310,20 @@ std::string saveModuleIR(Module &M, int I, StringRef Suff) { std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, - StringRef Suff, StringRef Target = "") { - auto PropSet = - computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); + StringRef Suff, StringRef Target = "", + bool IsDeviceLib = false) { + PropSetRegTy PropSet; + + // For fallback devicelib module, no kernel included and no specialization + // constant used, skip regular Prop emit. + if (!IsDeviceLib) + PropSet = computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); + else { + auto SYCLDeviceLibMeta = getSYCLDeviceLibMeta(MD.Name); + std::map RMEntry = { + {"DeviceLibMetaData", SYCLDeviceLibMeta}}; + PropSet.add(PropSetRegTy::SYCL_DEVICELIB_METADATA, RMEntry); + } std::string NewSuff = Suff.str(); if (!Target.empty()) { @@ -421,17 +431,24 @@ void addTableRow(util::SimpleTable &Table, // IR component saving is skipped, and this file name is recorded as such in // the result. void saveModule(std::vector> &OutTables, - module_split::ModuleDesc &MD, int I, StringRef IRFilename) { + module_split::ModuleDesc &MD, int I, StringRef IRFilename, + bool IsDeviceLib = false) { IrPropSymFilenameTriple BaseTriple; StringRef Suffix = getModuleSuffix(MD); MD.saveSplitInformationAsMetadata(); - if (!IRFilename.empty()) { - // don't save IR, just record the filename - BaseTriple.Ir = IRFilename.str(); + if (!IsDeviceLib) { + if (!IRFilename.empty()) { + // don't save IR, just record the filename + BaseTriple.Ir = IRFilename.str(); + } else { + MD.cleanup(); + BaseTriple.Ir = saveModuleIR(MD.getModule(), I, Suffix); + } } else { - MD.cleanup(); + // For DeviceLib Modules, don't need to do clean up. BaseTriple.Ir = saveModuleIR(MD.getModule(), I, Suffix); } + if (DoSymGen) { // save the names of the entry points - the symbol table BaseTriple.Sym = saveModuleSymbolTable(MD, I, Suffix); @@ -445,13 +462,24 @@ void saveModule(std::vector> &OutTables, GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, EmitExportedSymbols, EmitImportedSymbols, DeviceGlobals}; - CopyTriple.Prop = - saveModuleProperties(MD, Props, I, Suffix, OutputFile.Target); + CopyTriple.Prop = saveModuleProperties(MD, Props, I, Suffix, + OutputFile.Target, IsDeviceLib); } addTableRow(*Table, CopyTriple); } } +void saveDeviceLibModule( + std::vector> &OutTables, + const std::string &IRFile, int I, LLVMContext &Context) { + SMDiagnostic Err; + StringRef DeviceLibLoc = DeviceLibDir; + std::string IRPath = DeviceLibLoc.str() + "/" + IRFile; + std::unique_ptr IRModule = parseIRFile(IRPath, Err, Context); + llvm::module_split::ModuleDesc LibIRMD(std::move(IRModule), IRFile); + saveModule(OutTables, LibIRMD, I, IRFile, true); +} + module_split::ModuleDesc link(module_split::ModuleDesc &&MD1, module_split::ModuleDesc &&MD2) { std::vector Names; @@ -747,7 +775,7 @@ bool isTargetCompatibleWithModule(const std::string &Target, } std::vector> -processInputModule(std::unique_ptr M) { +processInputModule(std::unique_ptr M, LLVMContext &Context) { // Construct the resulting table which will accumulate all the outputs. SmallVector ColumnTitles{ StringRef(COL_CODE)}; @@ -776,6 +804,9 @@ processInputModule(std::unique_ptr M) { // if none were made. bool Modified = false; + // Keeps track of required device libraries by all device images. + unsigned int DeviceLibReqMask = 0; + // Propagate ESIMD attribute to wrapper functions to prevent // spurious splits and kernel link errors. Modified |= runModulePass(*M); @@ -887,6 +918,7 @@ processInputModule(std::unique_ptr M) { "have been made\n"; } for (module_split::ModuleDesc &IrMD : MMs) { + DeviceLibReqMask |= getSYCLDeviceLibReqMask(IrMD.getModule()); saveModule(Tables, IrMD, ID, OutIRFileName); } @@ -895,12 +927,22 @@ processInputModule(std::unique_ptr M) { if (!MMsWithDefaultSpecConsts.empty()) { for (size_t i = 0; i != MMsWithDefaultSpecConsts.size(); ++i) { module_split::ModuleDesc &IrMD = MMsWithDefaultSpecConsts[i]; + DeviceLibReqMask |= getSYCLDeviceLibReqMask(IrMD.getModule()); saveModule(Tables, IrMD, ID, OutIRFileName); } ++ID; } } + + if ((DeviceLibReqMask > 0) && (DeviceLibDir.getNumOccurrences() > 0)) { + string_vector DeviceLibReqNames; + getSYCLDeviceLibReqNames(DeviceLibReqMask, DeviceLibReqNames); + for (auto Fn : DeviceLibReqNames) { + saveDeviceLibModule(Tables, Fn, ID, Context); + ++ID; + } + } return Tables; } @@ -1044,7 +1086,7 @@ int main(int argc, char **argv) { } std::vector> Tables = - processInputModule(std::move(M)); + processInputModule(std::move(M), Context); // Input module was processed and a single output file was requested. if (IROutputOnly) From c889b3002532c324c2b97eaaf525fbd1be828801 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 3 Dec 2024 10:26:21 +0800 Subject: [PATCH 31/37] fix typo Signed-off-by: jinge90 --- llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp index 4ecbe5fe615a5..6a4f612028f80 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp @@ -841,7 +841,7 @@ unsigned int llvm::getSYCLDeviceLibMeta(std::string &DeviceLibFn) { if (DeviceLibFn == Fn) { DeviceLibMeta = static_cast(Ext); if (DeviceLibFn == "libsycl-native-bfloat16.bc") - DeviceLibMeta |= 0x80000000ULL; + DeviceLibMeta |= 0x80000000; break; } } From 768c11094ed136a4be16565e5218c3a463b98160 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 3 Dec 2024 17:24:44 +0800 Subject: [PATCH 32/37] add device image map in ProgramManager Signed-off-by: jinge90 --- sycl/source/detail/compiler.hpp | 2 ++ sycl/source/detail/device_binary_image.cpp | 1 + sycl/source/detail/device_binary_image.hpp | 4 ++++ .../program_manager/program_manager.cpp | 23 ++++++++++++++++--- .../program_manager/program_manager.hpp | 4 ++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 35f313ceec3f5..d8fd7154ce73e 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -46,6 +46,8 @@ "SYCL/specialization constants default values" /// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" +/// PropertySetRegistry::SYCL_DEVICELIB_METADATA defined in PropertySetIO.h +#define __SYCL_PROPERTY_SET_DEVICELIB_METADATA "SYCL/devicelib metadata" /// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt" /// PropertySetRegistry::SYCL_KERNEL_PROGRAM_METADATA defined in PropertySetIO.h diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 2be48d4a38fce..80ccbe3daf619 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -180,6 +180,7 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { SpecConstDefaultValuesMap.init( Bin, __SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP); DeviceLibReqMask.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK); + DeviceLibMetaData.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_METADATA); KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); AssertUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); ProgramMetadata.init(Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA); diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 203427b89ca45..4fcbd59e78274 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -214,6 +214,9 @@ class RTDeviceBinaryImage { return SpecConstDefaultValuesMap; } const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; } + const PropertyRange &getDeviceLibMetaData() const { + return DeviceLibMetaData; + } const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } @@ -246,6 +249,7 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange SpecConstIDMap; RTDeviceBinaryImage::PropertyRange SpecConstDefaultValuesMap; RTDeviceBinaryImage::PropertyRange DeviceLibReqMask; + RTDeviceBinaryImage::PropertyRange DeviceLibMetaData; RTDeviceBinaryImage::PropertyRange KernelParamOptInfo; RTDeviceBinaryImage::PropertyRange AssertUsed; RTDeviceBinaryImage::PropertyRange ProgramMetadata; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 58b6aa05afed6..8a1a0effb3a21 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -752,7 +752,8 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } } -static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +static inline void +CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) @@ -1772,9 +1773,25 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); const sycl_offload_entry EntriesB = RawImg->EntriesBegin; const sycl_offload_entry EntriesE = RawImg->EntriesEnd; - // Treat the image as empty one - if (EntriesB == EntriesE) + // Treat the image as empty one or devicelib images + if (EntriesB == EntriesE) { + std::unique_ptr Img = + std::make_unique(RawImg); + const RTDeviceBinaryImage::PropertyRange &DeviceLibMetaProp = + Img->getDeviceLibMetaData(); + if (DeviceLibMetaProp.isAvailable()) { + std::lock_guard DeviceLibImagesGuard( + m_DeviceLibImagesMutex); + unsigned DeviceLibMetaData = + DeviceBinaryProperty(*(DeviceLibMetaProp.begin())).asUint32(); + // Add device library image to device image map only when current + // key has not been inserted. + if (m_DeviceLibImages.find(DeviceLibMetaData) == + m_DeviceLibImages.end()) + m_DeviceLibImages[DeviceLibMetaData] = std::move(Img); + } continue; + } std::unique_ptr Img; if (isDeviceImageCompressed(RawImg)) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 75689aadfb290..2d999513791bf 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -343,6 +343,10 @@ class ProgramManager { /// identified by its name. using RTDeviceBinaryImageUPtr = std::unique_ptr; + std::mutex m_DeviceLibImagesMutex; + + std::unordered_map m_DeviceLibImages; + /// Maps names of kernels to their unique kernel IDs. /// TODO: Use std::unordered_set with transparent hash and equality functions /// when C++20 is enabled for the runtime library. From 57fb5430b3a7d4ecb548357e07b005426d70560f Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 4 Dec 2024 17:10:42 +0800 Subject: [PATCH 33/37] link device lib image when required Signed-off-by: jinge90 --- .../program_manager/program_manager.cpp | 128 ++++++++++++++++-- .../program_manager/program_manager.hpp | 5 + 2 files changed, 121 insertions(+), 12 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index b418ac38302ed..bda3189ecc380 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1171,8 +1171,8 @@ ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, // TODO device libraries may use scpecialization constants, manifest files, etc. // To support that they need to be delivered in a different container - so that // sycl_device_binary_struct can be created for each of them. -static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, - ur_program_handle_t &Prog) { +static bool loadDeviceLibLegacy(const ContextImplPtr Context, const char *Name, + ur_program_handle_t &Prog) { std::string LibSyclDir = OSUtil::getCurrentDSODir(); std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, std::ifstream::in | std::ifstream::binary); @@ -1192,6 +1192,14 @@ static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, return Prog != nullptr; } +static bool loadDeviceLib(const ContextImplPtr Context, + ur_program_handle_t &Prog, + const unsigned char *DeviceLibImageBuffer, + size_t DeviceLibImageSize) { + Prog = createSpirvProgram(Context, DeviceLibImageBuffer, DeviceLibImageSize); + return Prog != nullptr; +} + // For each extension, a pair of library names. The first uses native support, // the second emulates functionality in software. static const std::map> @@ -1272,9 +1280,13 @@ static ur_result_t doCompile(const AdapterPtr &Adapter, static ur_program_handle_t loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, std::vector &Devices, - bool UseNativeLib) { + bool UseNativeLib, bool LegacyMode = true, + const unsigned char *DeviceLibImageBuffer = nullptr, + size_t DeviceLibImageSize = 0) { - auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); + const char *LibFileName = nullptr; + if (LegacyMode) + LibFileName = getDeviceLibFilename(Extension, UseNativeLib); auto LockedCache = Context->acquireCachedLibPrograms(); auto &CachedLibPrograms = LockedCache.get(); // Collect list of devices to compile the library for. Library was already @@ -1311,10 +1323,21 @@ loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, bool IsProgramCreated = !URProgram; // Create UR program for device lib if we don't have it yet. - if (!URProgram && !loadDeviceLib(Context, LibFileName, URProgram)) { - EraseProgramForDevices(); - throw exception(make_error_code(errc::build), - std::string("Failed to load ") + LibFileName); + if (LegacyMode) { + if (!URProgram && !loadDeviceLibLegacy(Context, LibFileName, URProgram)) { + EraseProgramForDevices(); + throw exception(make_error_code(errc::build), + std::string("Failed to load ") + LibFileName); + } + } else { + if (!URProgram && !loadDeviceLib(Context, URProgram, DeviceLibImageBuffer, + DeviceLibImageSize)) { + EraseProgramForDevices(); + const char *ExtStr = getDeviceLibExtensionStr(Extension); + throw exception( + make_error_code(errc::build), + std::string("Failed to load fallback device library for ") + ExtStr); + } } // Insert URProgram into the cache for all devices that we compiled it for. @@ -1573,9 +1596,9 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { } static std::vector -getDeviceLibPrograms(const ContextImplPtr Context, - std::vector &Devices, - uint32_t DeviceLibReqMask) { +getDeviceLibProgramsLegacy(const ContextImplPtr Context, + std::vector &Devices, + uint32_t DeviceLibReqMask) { std::vector Programs; std::pair RequiredDeviceLibExt[] = { @@ -1658,6 +1681,83 @@ getDeviceLibPrograms(const ContextImplPtr Context, return Programs; } +std::vector ProgramManager::getDeviceLibReqPrograms( + const ContextImplPtr Context, std::vector &Devices, + uint32_t DeviceLibReqMask) { + + std::vector Programs; + + // Check whether a specified extension is supported by ALL devices. + auto checkExtForDevices = [&Context, &Devices](const char *ExtStr) -> bool { + bool ExtAvailable = true; + for (auto SingleDevice : Devices) { + std::string DevExtList = + Context->getPlatformImpl() + ->getDeviceImpl(SingleDevice) + ->get_device_info_string( + UrInfoCode::value); + if (DevExtList.npos == DevExtList.find(ExtStr)) { + ExtAvailable = false; + break; + } + } + return ExtAvailable; + }; + + const bool fp64Support = checkExtForDevices("cl_khr_fp64"); + + size_t Idx = 0; + std::vector ReqDeviceLibExts; + while (DeviceLibReqMask != 0) { + if (DeviceLibReqMask & 1) { + DeviceLibExt ExtReq = static_cast( + static_cast(DeviceLibExt::cl_intel_devicelib_assert) + Idx); + ReqDeviceLibExts.push_back(ExtReq); + } + ++Idx; + DeviceLibReqMask = DeviceLibReqMask >> 1; + } + + std::vector ReqExtMetaKeys; + for (auto Ext : ReqDeviceLibExts) { + if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || + Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64 || + Ext == DeviceLibExt::cl_intel_devicelib_imf_fp64) && + !fp64Support) { + continue; + } + auto ExtName = getDeviceLibExtensionStr(Ext); + bool InhibitNativeImpl = false; + if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { + InhibitNativeImpl = strstr(Env, ExtName) != nullptr; + } + bool ExtReqAvailable = checkExtForDevices(ExtName); + unsigned ExtMetaKey = static_cast(Ext); + if (ExtReqAvailable && !InhibitNativeImpl) { + if (Ext == DeviceLibExt::cl_intel_devicelib_bfloat16) { + ExtMetaKey = ExtMetaKey | 0x80000000; + } else + continue; + } + ReqExtMetaKeys.push_back(ExtMetaKey); + } + + if (ReqExtMetaKeys.size() > 0) { + std::lock_guard DeviceLibImagesGuard(m_DeviceLibImagesMutex); + for (auto Key : ReqExtMetaKeys) { + if (m_DeviceLibImages.find(Key) != m_DeviceLibImages.end()) { + bool IsNative = ((Key & 0x80000000) > 0); + DeviceLibExt Ext = static_cast(Key & 0x7FFFFFFF); + Programs.push_back(loadDeviceLibFallback( + Context, Ext, Devices, IsNative, false, + m_DeviceLibImages[Key]->getRawData().BinaryStart, + m_DeviceLibImages[Key]->getSize())); + } + } + } + return Programs; +} + // Check if device image is compressed. static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { @@ -1691,7 +1791,11 @@ ProgramManager::ProgramPtr ProgramManager::build( std::vector LinkPrograms; if (LinkDeviceLibs) { - LinkPrograms = getDeviceLibPrograms(Context, Devices, DeviceLibReqMask); + LinkPrograms = getDeviceLibReqPrograms(Context, Devices, DeviceLibReqMask); + if (LinkPrograms.size() == 0) { + LinkPrograms = + getDeviceLibProgramsLegacy(Context, Devices, DeviceLibReqMask); + } } static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK"); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 030ad898fbf88..ceee61261f010 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -217,6 +217,11 @@ class ProgramManager { uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); + std::vector + getDeviceLibReqPrograms(const ContextImplPtr Context, + std::vector &Devices, + uint32_t DeviceLibReqMask); + /// Returns the mask for eliminated kernel arguments for the requested kernel /// within the native program. /// \param NativePrg the UR program associated with the kernel. From e88fb525aef6c5476c4bd7899e2487acbff7366e Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 6 Dec 2024 13:42:21 +0800 Subject: [PATCH 34/37] add __devicelib_* function call as imported symbol Signed-off-by: jinge90 --- .../SYCLLowerIR/ComputeModuleRuntimeInfo.h | 3 +++ .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 27 ++++++++++++++++++- llvm/lib/SYCLLowerIR/ModuleSplitter.cpp | 3 +++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 15 +++++------ 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h index af2120d9dc702..5d0b8fbb2eed9 100644 --- a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h @@ -37,6 +37,9 @@ PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, const GlobalBinImageProps &GlobProps); +PropSetRegTy computeSYCLDeviceLibProperties(const Module &M, + std::string &SYCLDeviceLibName); + std::string computeModuleSymbolTable(const Module &M, const EntryPointSet &EntryPoints); diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 258c955755dff..35833eec49415 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -150,6 +150,31 @@ std::optional getKernelSingleEltMetadata(const Function &Func, return std::nullopt; } +PropSetRegTy computeSYCLDeviceLibProperties(const Module &M, + std::string &SYCLDeviceLibName) { + PropSetRegTy PropSet; + + { + auto SYCLDeviceLibMeta = getSYCLDeviceLibMeta(SYCLDeviceLibName); + std::map RMEntry = { + {"DeviceLibMetaData", SYCLDeviceLibMeta}}; + PropSet.add(PropSetRegTy::SYCL_DEVICELIB_METADATA, RMEntry); + } + + { + for (const auto &F : M.functions()) { + if (!F.getName().starts_with("__devicelib_") || F.isDeclaration()) + continue; + if (F.getCallingConv() == CallingConv::SPIR_FUNC) { + PropSet.add(PropSetRegTy::SYCL_EXPORTED_SYMBOLS, F.getName(), + /*PropVal=*/true); + } + } + } + + return PropSet; +} + PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, const GlobalBinImageProps &GlobProps) { @@ -454,7 +479,7 @@ PropSetRegTy computeModuleProperties(const Module &M, } PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, - "uses-virtual-functions-set", AllSets); + "uses-virtual-functions-set", AllSets); } } diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index 904424f93dae6..1617386c42b98 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -1402,6 +1402,9 @@ splitSYCLModule(std::unique_ptr M, ModuleSplitterSettings Settings) { } bool canBeImportedFunction(const Function &F) { + + if (F.getName().starts_with("__devicelib_") && F.isDeclaration()) + return true; // It may be theoretically possible to determine what is importable // based solely on function F, but the "SYCL/imported symbols" // property list MUST NOT have any imported symbols that are not supplied diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 3a74060a1cb81..3497880acee9c 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -318,12 +318,8 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, // constant used, skip regular Prop emit. if (!IsDeviceLib) PropSet = computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); - else { - auto SYCLDeviceLibMeta = getSYCLDeviceLibMeta(MD.Name); - std::map RMEntry = { - {"DeviceLibMetaData", SYCLDeviceLibMeta}}; - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_METADATA, RMEntry); - } + else + PropSet = computeSYCLDeviceLibProperties(MD.getModule(), MD.Name); std::string NewSuff = Suff.str(); if (!Target.empty()) { @@ -459,9 +455,10 @@ void saveModule(std::vector> &OutTables, continue; auto CopyTriple = BaseTriple; if (DoPropGen) { - GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, - EmitExportedSymbols, EmitImportedSymbols, - DeviceGlobals}; + if (EmitImportedSymbols) + GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, + EmitExportedSymbols, EmitImportedSymbols, + DeviceGlobals}; CopyTriple.Prop = saveModuleProperties(MD, Props, I, Suffix, OutputFile.Target, IsDeviceLib); } From aba8d3099dfcce31788a9c032b495c2979b592c0 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 6 Dec 2024 14:55:59 +0800 Subject: [PATCH 35/37] Use sycl shared library mechanism to link required fallback device library Signed-off-by: jinge90 --- .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 5 - llvm/tools/sycl-post-link/sycl-post-link.cpp | 1 - .../program_manager/program_manager.cpp | 147 ++---------------- .../program_manager/program_manager.hpp | 9 -- 4 files changed, 16 insertions(+), 146 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 7a0967217b028..56736de52bef4 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -181,11 +181,6 @@ PropSetRegTy computeModuleProperties(const Module &M, const GlobalBinImageProps &GlobProps) { PropSetRegTy PropSet; - { - uint32_t MRMask = getSYCLDeviceLibReqMask(M); - std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); - } { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 3497880acee9c..50a6cccc20789 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -455,7 +455,6 @@ void saveModule(std::vector> &OutTables, continue; auto CopyTriple = BaseTriple; if (DoPropGen) { - if (EmitImportedSymbols) GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, EmitExportedSymbols, EmitImportedSymbols, DeviceGlobals}; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 4f0d0fbadb07c..946ab357c8a35 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -768,8 +768,7 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } } -static inline void -CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) @@ -1171,8 +1170,8 @@ ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, // TODO device libraries may use scpecialization constants, manifest files, etc. // To support that they need to be delivered in a different container - so that // sycl_device_binary_struct can be created for each of them. -static bool loadDeviceLibLegacy(const ContextImplPtr Context, const char *Name, - ur_program_handle_t &Prog) { +static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, + ur_program_handle_t &Prog) { std::string LibSyclDir = OSUtil::getCurrentDSODir(); std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, std::ifstream::in | std::ifstream::binary); @@ -1192,14 +1191,6 @@ static bool loadDeviceLibLegacy(const ContextImplPtr Context, const char *Name, return Prog != nullptr; } -static bool loadDeviceLib(const ContextImplPtr Context, - ur_program_handle_t &Prog, - const unsigned char *DeviceLibImageBuffer, - size_t DeviceLibImageSize) { - Prog = createSpirvProgram(Context, DeviceLibImageBuffer, DeviceLibImageSize); - return Prog != nullptr; -} - // For each extension, a pair of library names. The first uses native support, // the second emulates functionality in software. static const std::map> @@ -1280,13 +1271,9 @@ static ur_result_t doCompile(const AdapterPtr &Adapter, static ur_program_handle_t loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, std::vector &Devices, - bool UseNativeLib, bool LegacyMode = true, - const unsigned char *DeviceLibImageBuffer = nullptr, - size_t DeviceLibImageSize = 0) { + bool UseNativeLib) { - const char *LibFileName = nullptr; - if (LegacyMode) - LibFileName = getDeviceLibFilename(Extension, UseNativeLib); + auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); auto LockedCache = Context->acquireCachedLibPrograms(); auto &CachedLibPrograms = LockedCache.get(); // Collect list of devices to compile the library for. Library was already @@ -1323,21 +1310,10 @@ loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, bool IsProgramCreated = !URProgram; // Create UR program for device lib if we don't have it yet. - if (LegacyMode) { - if (!URProgram && !loadDeviceLibLegacy(Context, LibFileName, URProgram)) { - EraseProgramForDevices(); - throw exception(make_error_code(errc::build), - std::string("Failed to load ") + LibFileName); - } - } else { - if (!URProgram && !loadDeviceLib(Context, URProgram, DeviceLibImageBuffer, - DeviceLibImageSize)) { - EraseProgramForDevices(); - const char *ExtStr = getDeviceLibExtensionStr(Extension); - throw exception( - make_error_code(errc::build), - std::string("Failed to load fallback device library for ") + ExtStr); - } + if (!URProgram && !loadDeviceLib(Context, LibFileName, URProgram)) { + EraseProgramForDevices(); + throw exception(make_error_code(errc::build), + std::string("Failed to load ") + LibFileName); } // Insert URProgram into the cache for all devices that we compiled it for. @@ -1596,9 +1572,9 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { } static std::vector -getDeviceLibProgramsLegacy(const ContextImplPtr Context, - std::vector &Devices, - uint32_t DeviceLibReqMask) { +getDeviceLibPrograms(const ContextImplPtr Context, + std::vector &Devices, + uint32_t DeviceLibReqMask) { std::vector Programs; std::pair RequiredDeviceLibExt[] = { @@ -1681,83 +1657,6 @@ getDeviceLibProgramsLegacy(const ContextImplPtr Context, return Programs; } -std::vector ProgramManager::getDeviceLibReqPrograms( - const ContextImplPtr Context, std::vector &Devices, - uint32_t DeviceLibReqMask) { - - std::vector Programs; - - // Check whether a specified extension is supported by ALL devices. - auto checkExtForDevices = [&Context, &Devices](const char *ExtStr) -> bool { - bool ExtAvailable = true; - for (auto SingleDevice : Devices) { - std::string DevExtList = - Context->getPlatformImpl() - ->getDeviceImpl(SingleDevice) - ->get_device_info_string( - UrInfoCode::value); - if (DevExtList.npos == DevExtList.find(ExtStr)) { - ExtAvailable = false; - break; - } - } - return ExtAvailable; - }; - - const bool fp64Support = checkExtForDevices("cl_khr_fp64"); - - size_t Idx = 0; - std::vector ReqDeviceLibExts; - while (DeviceLibReqMask != 0) { - if (DeviceLibReqMask & 1) { - DeviceLibExt ExtReq = static_cast( - static_cast(DeviceLibExt::cl_intel_devicelib_assert) + Idx); - ReqDeviceLibExts.push_back(ExtReq); - } - ++Idx; - DeviceLibReqMask = DeviceLibReqMask >> 1; - } - - std::vector ReqExtMetaKeys; - for (auto Ext : ReqDeviceLibExts) { - if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || - Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64 || - Ext == DeviceLibExt::cl_intel_devicelib_imf_fp64) && - !fp64Support) { - continue; - } - auto ExtName = getDeviceLibExtensionStr(Ext); - bool InhibitNativeImpl = false; - if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { - InhibitNativeImpl = strstr(Env, ExtName) != nullptr; - } - bool ExtReqAvailable = checkExtForDevices(ExtName); - unsigned ExtMetaKey = static_cast(Ext); - if (ExtReqAvailable && !InhibitNativeImpl) { - if (Ext == DeviceLibExt::cl_intel_devicelib_bfloat16) { - ExtMetaKey = ExtMetaKey | 0x80000000; - } else - continue; - } - ReqExtMetaKeys.push_back(ExtMetaKey); - } - - if (ReqExtMetaKeys.size() > 0) { - std::lock_guard DeviceLibImagesGuard(m_DeviceLibImagesMutex); - for (auto Key : ReqExtMetaKeys) { - if (m_DeviceLibImages.find(Key) != m_DeviceLibImages.end()) { - bool IsNative = ((Key & 0x80000000) > 0); - DeviceLibExt Ext = static_cast(Key & 0x7FFFFFFF); - Programs.push_back(loadDeviceLibFallback( - Context, Ext, Devices, IsNative, false, - m_DeviceLibImages[Key]->getRawData().BinaryStart, - m_DeviceLibImages[Key]->getSize())); - } - } - } - return Programs; -} - // Check if device image is compressed. static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { @@ -1791,11 +1690,7 @@ ProgramManager::ProgramPtr ProgramManager::build( std::vector LinkPrograms; if (LinkDeviceLibs) { - LinkPrograms = getDeviceLibReqPrograms(Context, Devices, DeviceLibReqMask); - if (LinkPrograms.size() == 0) { - LinkPrograms = - getDeviceLibProgramsLegacy(Context, Devices, DeviceLibReqMask); - } + LinkPrograms = getDeviceLibPrograms(Context, Devices, DeviceLibReqMask); } static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK"); @@ -1913,24 +1808,14 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); const sycl_offload_entry EntriesB = RawImg->EntriesBegin; const sycl_offload_entry EntriesE = RawImg->EntriesEnd; - // Treat the image as empty one or devicelib images + // Treat the image as empty one if (EntriesB == EntriesE) { std::unique_ptr Img = std::make_unique(RawImg); const RTDeviceBinaryImage::PropertyRange &DeviceLibMetaProp = Img->getDeviceLibMetaData(); - if (DeviceLibMetaProp.isAvailable()) { - std::lock_guard DeviceLibImagesGuard( - m_DeviceLibImagesMutex); - unsigned DeviceLibMetaData = - DeviceBinaryProperty(*(DeviceLibMetaProp.begin())).asUint32(); - // Add device library image to device image map only when current - // key has not been inserted. - if (m_DeviceLibImages.find(DeviceLibMetaData) == - m_DeviceLibImages.end()) - m_DeviceLibImages[DeviceLibMetaData] = std::move(Img); - } - continue; + if (!DeviceLibMetaProp.isAvailable()) + continue; } std::unique_ptr Img; diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 7a1d8d1b72380..abfdb1144105b 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -217,11 +217,6 @@ class ProgramManager { uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); - std::vector - getDeviceLibReqPrograms(const ContextImplPtr Context, - std::vector &Devices, - uint32_t DeviceLibReqMask); - /// Returns the mask for eliminated kernel arguments for the requested kernel /// within the native program. /// \param NativePrg the UR program associated with the kernel. @@ -379,10 +374,6 @@ class ProgramManager { /// identified by its name. using RTDeviceBinaryImageUPtr = std::unique_ptr; - std::mutex m_DeviceLibImagesMutex; - - std::unordered_map m_DeviceLibImages; - /// Maps names of kernels to their unique kernel IDs. /// TODO: Use std::unordered_set with transparent hash and equality functions /// when C++20 is enabled for the runtime library. From 1cc7a0dc2a88f23ad341f312551c0c91f1393a7b Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 6 Dec 2024 17:25:03 +0800 Subject: [PATCH 36/37] link required fallback spv as shared libraries Signed-off-by: jinge90 --- .../program_manager/program_manager.cpp | 100 +++++++++++++----- 1 file changed, 73 insertions(+), 27 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 946ab357c8a35..00300b620c194 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -603,6 +603,74 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, return (0 == SuitableImageID); } +// For each extension understood by the SYCL runtime, the string representation +// of its name. Names with devicelib in them are internal to the runtime. Others +// are actual OpenCL extensions. +static const std::map DeviceLibExtensionStrs = { + {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"}, + {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, + "cl_intel_devicelib_math_fp64"}, + {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, + "cl_intel_devicelib_complex_fp64"}, + {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"}, + {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, + "cl_intel_bfloat16_conversions"}}; + +static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { + auto Ext = DeviceLibExtensionStrs.find(Extension); + if (Ext == DeviceLibExtensionStrs.end()) + throw exception(make_error_code(errc::build), + "Unhandled (new?) device library extension"); + return Ext->second; +} + +static bool skipFallbackSYCLDeviceLib(RTDeviceBinaryImage *Img, + const device &Dev) { + const RTDeviceBinaryImage::PropertyRange &DeviceLibMetaProp = + Img->getDeviceLibMetaData(); + unsigned DeviceLibMetaData = + DeviceBinaryProperty(*(DeviceLibMetaProp.begin())).asUint32(); + DeviceLibExt LibExt = + static_cast(DeviceLibMetaData & 0x7FFFFFFF); + + std::vector DeviceExtensions = + Dev.get_info(); + + bool FP64Supported = + (std::find(DeviceExtensions.begin(), DeviceExtensions.end(), + "cl_khr_fp64") != DeviceExtensions.end()); + if ((LibExt == DeviceLibExt::cl_intel_devicelib_math_fp64 || + LibExt == DeviceLibExt::cl_intel_devicelib_complex_fp64 || + LibExt == DeviceLibExt::cl_intel_devicelib_imf_fp64) && + !FP64Supported) + return true; + + const char *ExtName = getDeviceLibExtensionStr(LibExt); + bool NativeSupported = + (std::find(DeviceExtensions.begin(), DeviceExtensions.end(), ExtName) != + DeviceExtensions.end()); + + bool InhibitNativeImpl = false; + if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { + InhibitNativeImpl = strstr(Env, ExtName) != nullptr; + } + + if (NativeSupported && !InhibitNativeImpl) + return true; + + if (LibExt == DeviceLibExt::cl_intel_devicelib_bfloat16) { + bool IsNative = ((DeviceLibMetaData & 0x80000000) != 0); + if (IsNative != NativeSupported) + return true; + } + return false; +} + static bool checkLinkingSupport(const device &Dev, const RTDeviceBinaryImage &Img) { const char *Target = Img.getRawData().DeviceTargetSpec; @@ -659,6 +727,9 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( !doesDevSupportDeviceRequirements(Dev, *Img) || !compatibleWithDevice(Img, Dev)) continue; + if (Img->getDeviceLibMetaData().isAvailable() && + skipFallbackSYCLDeviceLib(Img, Dev)) + continue; DeviceImagesToLink.insert(Img); Found = true; for (const sycl_device_binary_property &ISProp : @@ -768,7 +839,8 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } } -static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +static inline void +CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) @@ -1227,32 +1299,6 @@ static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) { return Lib; } -// For each extension understood by the SYCL runtime, the string representation -// of its name. Names with devicelib in them are internal to the runtime. Others -// are actual OpenCL extensions. -static const std::map DeviceLibExtensionStrs = { - {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"}, - {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"}, - {DeviceLibExt::cl_intel_devicelib_math_fp64, - "cl_intel_devicelib_math_fp64"}, - {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"}, - {DeviceLibExt::cl_intel_devicelib_complex_fp64, - "cl_intel_devicelib_complex_fp64"}, - {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"}, - {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"}, - {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"}, - {DeviceLibExt::cl_intel_devicelib_bfloat16, - "cl_intel_bfloat16_conversions"}}; - -static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { - auto Ext = DeviceLibExtensionStrs.find(Extension); - if (Ext == DeviceLibExtensionStrs.end()) - throw exception(make_error_code(errc::build), - "Unhandled (new?) device library extension"); - return Ext->second; -} - static ur_result_t doCompile(const AdapterPtr &Adapter, ur_program_handle_t Program, uint32_t NumDevs, ur_device_handle_t *Devs, ur_context_handle_t Ctx, From 84cd4da2a47f1cbb8af37a10b9a7b29a4acda87e Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 21 Jan 2025 10:35:12 +0800 Subject: [PATCH 37/37] sync with latest intel/llvm Signed-off-by: jinge90 --- llvm/tools/sycl-post-link/sycl-post-link.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 3ab4d670c0f8b..a15add8482ca1 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -464,9 +464,9 @@ void saveModule(std::vector> &OutTables, continue; auto CopyTriple = BaseTriple; if (DoPropGen) { - GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, - EmitExportedSymbols, EmitImportedSymbols, - DeviceGlobals}; + GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, + EmitExportedSymbols, EmitImportedSymbols, + DeviceGlobals}; CopyTriple.Prop = saveModuleProperties(MD, Props, I, Suffix, OutputFile.Target, IsDeviceLib); }