diff --git a/sycl/include/sycl/detail/kernel_name_str_t.hpp b/sycl/include/sycl/detail/kernel_name_str_t.hpp new file mode 100644 index 0000000000000..77481d75eeef9 --- /dev/null +++ b/sycl/include/sycl/detail/kernel_name_str_t.hpp @@ -0,0 +1,28 @@ +//==---------- kernel_name_str_t.hpp ----- Kernel name type aliases --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +using KernelNameStrT = std::string_view; +using KernelNameStrRefT = std::string_view; +using ABINeutralKernelNameStrT = detail::string_view; +#else +using KernelNameStrT = std::string; +using KernelNameStrRefT = const std::string &; +using ABINeutralKernelNameStrT = detail::string; +#endif + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/string.hpp b/sycl/include/sycl/detail/string.hpp index 28caa74d37357..63cd9fb81a620 100644 --- a/sycl/include/sycl/detail/string.hpp +++ b/sycl/include/sycl/detail/string.hpp @@ -5,12 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#pragma once #include #include -#pragma once - namespace sycl { inline namespace _V1 { namespace detail { @@ -58,6 +57,8 @@ class string { } const char *c_str() const noexcept { return str ? str : ""; } + const char *data() const noexcept { return c_str(); } + bool empty() { return str ? str[0] : false; } friend bool operator==(const string &lhs, std::string_view rhs) noexcept { return rhs == lhs.c_str(); diff --git a/sycl/include/sycl/detail/string_view.hpp b/sycl/include/sycl/detail/string_view.hpp index d36301efbfdac..00770210fa4e9 100644 --- a/sycl/include/sycl/detail/string_view.hpp +++ b/sycl/include/sycl/detail/string_view.hpp @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#pragma once -#include +#include -#pragma once +#include namespace sycl { inline namespace _V1 { @@ -26,6 +27,7 @@ class string_view { string_view(const string_view &strn) noexcept = default; string_view(string_view &&strn) noexcept = default; string_view(std::string_view strn) noexcept : str(strn.data()) {} + string_view(const sycl::detail::string &strn) noexcept : str(strn.c_str()) {} string_view &operator=(string_view &&strn) noexcept = default; string_view &operator=(const string_view &strn) noexcept = default; @@ -35,7 +37,12 @@ class string_view { return *this; } - const char *data() const noexcept { return str; } + string_view &operator=(const sycl::detail::string &strn) noexcept { + str = strn.c_str(); + return *this; + } + + const char *data() const noexcept { return str ? str : ""; } friend bool operator==(string_view lhs, std::string_view rhs) noexcept { return rhs == lhs.data(); diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 349a6939dd628..74e6161ae6196 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -504,7 +505,7 @@ class __SYCL_EXPORT handler { bool IsKernelCreatedFromSource, bool IsESIMD); /// \return a string containing name of SYCL kernel. - detail::string getKernelName(); + detail::ABINeutralKernelNameStrT getKernelName(); template bool lambdaAndKernelHaveEqualName() { // TODO It is unclear a kernel and a lambda/functor must to be equal or not @@ -514,7 +515,7 @@ class __SYCL_EXPORT handler { // values of arguments for the kernel. assert(MKernel && "MKernel is not initialized"); const std::string LambdaName = detail::getKernelName(); - detail::string KernelName = getKernelName(); + detail::ABINeutralKernelNameStrT KernelName = getKernelName(); return KernelName == LambdaName; } @@ -3418,7 +3419,7 @@ class __SYCL_EXPORT handler { std::shared_ptr MQueue; std::vector MLocalAccStorage; std::vector> MStreamStorage; - detail::string MKernelName; + detail::ABINeutralKernelNameStrT MKernelName; /// Storage for a sycl::kernel object. std::shared_ptr MKernel; /// Pointer to the source host memory or accessor(depending on command type). diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 8d8a04b673b36..c4647ebc53e66 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -256,7 +256,7 @@ class CGExecKernel : public CG { std::shared_ptr MSyclKernel; std::shared_ptr MKernelBundle; std::vector MArgs; - std::string MKernelName; + KernelNameStrT MKernelName; std::vector> MStreams; std::vector> MAuxiliaryResources; /// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list @@ -271,7 +271,7 @@ class CGExecKernel : public CG { std::shared_ptr SyclKernel, std::shared_ptr KernelBundle, CG::StorageInitHelper CGData, std::vector Args, - std::string KernelName, + KernelNameStrT KernelName, std::vector> Streams, std::vector> AuxiliaryResources, CGType Type, ur_kernel_cache_config_t KernelCacheConfig, @@ -293,7 +293,7 @@ class CGExecKernel : public CG { CGExecKernel(const CGExecKernel &CGExec) = default; const std::vector &getArguments() const { return MArgs; } - const std::string &getKernelName() const { return MKernelName; } + KernelNameStrRefT getKernelName() const { return MKernelName; } const std::vector> &getStreams() const { return MStreams; } diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 4b4d454475eb7..062fc65a4380c 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -142,6 +142,9 @@ class ManagedDeviceBinaries { sycl_device_binaries MBinaries; }; +using MangledKernelNameMapT = std::map>; +using KernelNameSetT = std::set>; + // Information unique to images compiled at runtime through the // ext_oneapi_kernel_compiler extension. struct KernelCompilerBinaryInfo { @@ -152,13 +155,12 @@ struct KernelCompilerBinaryInfo { : MLanguage{Lang}, MIncludePairs{std::move(IncludePairsVec)} {} KernelCompilerBinaryInfo(syclex::source_language Lang, - std::set &&KernelNames) + KernelNameSetT &&KernelNames) : MLanguage{Lang}, MKernelNames{std::move(KernelNames)} {} KernelCompilerBinaryInfo( - syclex::source_language Lang, std::set &&KernelNames, - std::unordered_map &&MangledKernelNames, - std::string &&Prefix, + syclex::source_language Lang, KernelNameSetT &&KernelNames, + MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix, std::shared_ptr &&DeviceGlobalRegistry) : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, MMangledKernelNames{std::move(MangledKernelNames)}, @@ -221,8 +223,8 @@ struct KernelCompilerBinaryInfo { } syclex::source_language MLanguage; - std::set MKernelNames; - std::unordered_map MMangledKernelNames; + KernelNameSetT MKernelNames; + MangledKernelNameMapT MMangledKernelNames; std::string MPrefix; include_pairs_t MIncludePairs; std::vector> @@ -278,7 +280,7 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, const std::vector &Devices, bundle_state State, ur_program_handle_t Program, syclex::source_language Lang, - std::set &&KernelNames) + KernelNameSetT &&KernelNames) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::make_shared>()), @@ -292,9 +294,8 @@ class device_image_impl { const RTDeviceBinaryImage *BinImage, const context &Context, const std::vector &Devices, bundle_state State, std::shared_ptr> &&KernelIDs, - syclex::source_language Lang, std::set &&KernelNames, - std::unordered_map &&MangledKernelNames, - std::string &&Prefix, + syclex::source_language Lang, KernelNameSetT &&KernelNames, + MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix, std::shared_ptr &&DeviceGlobalRegistry) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(nullptr), @@ -337,8 +338,7 @@ class device_image_impl { device_image_impl(const context &Context, const std::vector &Devices, bundle_state State, ur_program_handle_t Program, - syclex::source_language Lang, - std::set &&KernelNames) + syclex::source_language Lang, KernelNameSetT &&KernelNames) : MBinImage(static_cast(nullptr)), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), @@ -594,9 +594,9 @@ class device_image_impl { } } - std::string adjustKernelName(const std::string &Name) const { + std::string adjustKernelName(std::string_view Name) const { if (!MRTCBinInfo.has_value()) - return Name; + return Name.data(); if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { auto It = MRTCBinInfo->MMangledKernelNames.find(Name); @@ -604,7 +604,7 @@ class device_image_impl { return It->second; } - return Name; + return Name.data(); } bool hasKernelName(const std::string &Name) const { @@ -614,7 +614,7 @@ class device_image_impl { } std::shared_ptr tryGetSourceBasedKernel( - const std::string &Name, const context &Context, + std::string_view Name, const context &Context, const std::shared_ptr &OwnerBundle, const std::shared_ptr &Self) const { if (!(getOriginMask() & ImageOriginKernelCompiler)) @@ -768,8 +768,8 @@ class device_image_impl { std::vector> Result; Result.reserve(NewImages.size()); for (auto &[NewImage, KernelIDs] : NewImages) { - std::set KernelNames; - std::unordered_map MangledKernelNames; + KernelNameSetT KernelNames; + MangledKernelNameMapT MangledKernelNames; std::unordered_set DeviceGlobalIDSet; std::vector DeviceGlobalIDVec; std::vector DeviceGlobalNames; @@ -970,7 +970,7 @@ class device_image_impl { &KernelNamesStr[0], nullptr); std::vector KernelNames = detail::split_string(KernelNamesStr, ';'); - std::set KernelNameSet{KernelNames.begin(), KernelNames.end()}; + KernelNameSetT KernelNameSet{KernelNames.begin(), KernelNames.end()}; // If caching enabled and kernel not fetched from cache, cache. if (PersistentDeviceCodeCache::isEnabled() && !FetchedFromCache && diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 915d8ba8a338c..d62b57bef05df 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -795,7 +795,7 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx, CGExec->MLine, CGExec->MColumn); auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData( StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc, - CGExec->MKernelName.c_str(), nullptr, CGExec->MNDRDesc, + CGExec->MKernelName.data(), nullptr, CGExec->MNDRDesc, CGExec->MKernelBundle, CGExec->MArgs); if (CmdTraceEvent) sycl::detail::emitInstrumentationGeneral( @@ -1352,12 +1352,12 @@ void exec_graph_impl::update(std::shared_ptr GraphImpl) { sycl::detail::CGExecKernel *TargetCGExec = static_cast( MNodeStorage[i]->MCommandGroup.get()); - const std::string &TargetKernelName = TargetCGExec->getKernelName(); + KernelNameStrRefT TargetKernelName = TargetCGExec->getKernelName(); sycl::detail::CGExecKernel *SourceCGExec = static_cast( GraphImpl->MNodeStorage[i]->MCommandGroup.get()); - const std::string &SourceKernelName = SourceCGExec->getKernelName(); + KernelNameStrRefT SourceKernelName = SourceCGExec->getKernelName(); if (TargetKernelName.compare(SourceKernelName) != 0) { std::stringstream ErrorStream( diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index ec4d4b20763d8..22996c571b810 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -662,7 +662,7 @@ updatePromotedArgs(const ::jit_compiler::SYCLKernelInfo &FusedKernelInfo, ur_kernel_handle_t jit_compiler::materializeSpecConstants( const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage, - const std::string &KernelName, + KernelNameStrRefT KernelName, const std::vector &SpecConstBlob) { #ifndef _WIN32 if (!BinImage) { @@ -712,7 +712,7 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants( ::jit_compiler::option::JITTargetFeatures::set(TargetFeaturesOpt)); auto MaterializerResult = - MaterializeSpecConstHandle(KernelName.c_str(), BinInfo, SpecConstBlob); + MaterializeSpecConstHandle(KernelName.data(), BinInfo, SpecConstBlob); if (MaterializerResult.failed()) { std::string Message{"Compilation for kernel failed with message:\n"}; Message.append(MaterializerResult.getErrorMessage()); @@ -802,7 +802,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue, assert(KernelCmd->isFusable()); auto *KernelCG = static_cast(&CG); - auto &KernelName = KernelCG->MKernelName; + KernelNameStrRefT KernelName = KernelCG->MKernelName; if (KernelName.empty()) { printPerformanceWarning( "Cannot fuse kernel with invalid kernel function name"); @@ -810,7 +810,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue, } auto [DeviceImage, Program] = - retrieveKernelBinary(Queue, KernelName.c_str(), KernelCG); + retrieveKernelBinary(Queue, KernelName.data(), KernelCG); if (!DeviceImage || !Program) { printPerformanceWarning("No suitable IR available for fusion"); return nullptr; @@ -914,7 +914,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue, SYCLTypeToIndices(CurrentNDR.GlobalOffset)}; Ranges.push_back(JITCompilerNDR); - InputKernelInfo.emplace_back(KernelName.c_str(), ArgDescriptor, + InputKernelInfo.emplace_back(KernelName.data(), ArgDescriptor, JITCompilerNDR, BinInfo); // Collect information for the fused kernel diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index ba5911820bc78..9add14141fe2b 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #if SYCL_EXT_JIT_ENABLE #include @@ -46,7 +47,7 @@ class jit_compiler { ur_kernel_handle_t materializeSpecConstants(const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage, - const std::string &KernelName, + KernelNameStrRefT KernelName, const std::vector &SpecConstBlob); std::pair compileSYCL( diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 3cac4e5b2001f..12ac9a47b7c07 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -758,7 +759,7 @@ class kernel_bundle_impl { } std::shared_ptr - tryGetKernel(const std::string &Name, + tryGetKernel(detail::KernelNameStrRefT Name, const std::shared_ptr &Self) const { // TODO: For source-based kernels, it may be faster to keep a map between // {kernel_name, device} and their corresponding image. diff --git a/sycl/source/detail/kernel_id_impl.hpp b/sycl/source/detail/kernel_id_impl.hpp index 2512e173eaad4..9478797c38a74 100644 --- a/sycl/source/detail/kernel_id_impl.hpp +++ b/sycl/source/detail/kernel_id_impl.hpp @@ -8,6 +8,8 @@ #pragma once +#include + namespace sycl { inline namespace _V1 { namespace detail { @@ -31,14 +33,12 @@ struct EqualByNameComp { // identificator class kernel_id_impl { public: - kernel_id_impl(std::string Name) : MName(std::move(Name)) {} + kernel_id_impl(KernelNameStrT Name) : MName(std::move(Name)) {} kernel_id_impl(){}; const char *get_name() { return MName.data(); } - const std::string &get_name_string() { return MName; } - private: - std::string MName; + KernelNameStrT MName; }; } // namespace detail diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 8e45fa0ef0555..1eaad23c71a50 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -93,6 +93,13 @@ bool kernel_impl::hasSYCLMetadata() const noexcept { sycl::ext::oneapi::experimental::source_language::sycl)); } +// TODO this is how kernel_impl::get_info should behave instead. +std::string_view kernel_impl::getName() const { + if (MName.empty()) + MName = get_info(); + return MName; +} + bool kernel_impl::isBuiltInKernel(const device &Device) const { auto BuiltInKernels = Device.get_info(); if (BuiltInKernels.empty()) diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index cbef05f34fb27..8506dac0f85d0 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -237,6 +237,7 @@ class kernel_impl { const KernelArgMask *getKernelArgMask() const { return MKernelArgMaskPtr; } std::mutex *getCacheMutex() const { return MCacheMutex; } + std::string_view getName() const; private: ur_kernel_handle_t MKernel = nullptr; @@ -249,6 +250,7 @@ class kernel_impl { std::mutex MNoncacheableEnqueueMutex; const KernelArgMask *MKernelArgMaskPtr; std::mutex *MCacheMutex = nullptr; + mutable std::string MName; bool isBuiltInKernel(const device &Device) const; void checkIfValidForNumArgsInfoQuery() const; diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 6d50ff3f1d4df..a9c4cf0c41886 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -224,13 +224,13 @@ class KernelProgramCache { using KernelBuildResultPtr = std::shared_ptr; using KernelByNameT = - ::boost::unordered_map; + ::boost::unordered_map; using KernelCacheT = ::boost::unordered_map; using KernelFastCacheKeyT = std::pair; using KernelFastCacheValT = @@ -415,8 +415,7 @@ class KernelProgramCache { } std::pair - getOrInsertKernel(ur_program_handle_t Program, - const std::string &KernelName) { + getOrInsertKernel(ur_program_handle_t Program, KernelNameStrRefT KernelName) { auto LockedCache = acquireKernelsPerProgramCache(); auto &Cache = LockedCache.get()[Program]; auto [It, DidInsert] = Cache.try_emplace(KernelName, nullptr); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 7f8e8e8c6b02f..f88a8e9ad6ffb 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -833,7 +833,7 @@ CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { // its ref count incremented. ur_program_handle_t ProgramManager::getBuiltURProgram( const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, - const std::string &KernelName, const NDRDescT &NDRDesc) { + KernelNameStrRefT KernelName, const NDRDescT &NDRDesc) { DeviceImplPtr RootDevImpl; ur_bool_t MustBuildOnSubdevice = true; @@ -1094,7 +1094,7 @@ std::tuple ProgramManager::getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, - const std::string &KernelName, + KernelNameStrRefT KernelName, const NDRDescT &NDRDesc) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get() @@ -1130,7 +1130,7 @@ ProgramManager::getOrCreateKernel(const ContextImplPtr &ContextImpl, const AdapterPtr &Adapter = ContextImpl->getAdapter(); Adapter->call( - Program, KernelName.c_str(), &Kernel); + Program, KernelName.data(), &Kernel); // Only set UR_USM_INDIRECT_ACCESS if the platform can handle it. if (ContextImpl->getPlatformImpl()->supports_usm()) { @@ -1516,7 +1516,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( } RTDeviceBinaryImage & -ProgramManager::getDeviceImage(const std::string &KernelName, +ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, const ContextImplPtr &ContextImpl, const device &Device) { if constexpr (DbgProgMgr > 0) { @@ -1560,7 +1560,7 @@ ProgramManager::getDeviceImage(const std::string &KernelName, } throw exception(make_error_code(errc::runtime), - "No kernel named " + KernelName + " was found"); + "No kernel named " + std::string(KernelName) + " was found"); } RTDeviceBinaryImage &ProgramManager::getDeviceImage( @@ -1835,7 +1835,7 @@ void ProgramManager::cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img) { } std::optional -ProgramManager::kernelImplicitLocalArgPos(const std::string &KernelName) const { +ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { auto it = m_KernelImplicitLocalArgPos.find(KernelName); if (it != m_KernelImplicitLocalArgPos.end()) return it->second; @@ -2312,7 +2312,7 @@ uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { const KernelArgMask * ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, - const std::string &KernelName) { + KernelNameStrRefT KernelName) { // Bail out if there are no eliminated kernel arg masks in our images if (m_EliminatedKernelArgMasks.empty()) return nullptr; @@ -2365,7 +2365,7 @@ static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage) { } std::optional -ProgramManager::tryGetSYCLKernelID(const std::string &KernelName) { +ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); auto KernelID = m_KernelName2KernelIDs.find(KernelName); @@ -2375,7 +2375,7 @@ ProgramManager::tryGetSYCLKernelID(const std::string &KernelName) { return KernelID->second; } -kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) { +kernel_id ProgramManager::getSYCLKernelID(KernelNameStrRefT KernelName) { if (std::optional MaybeKernelID = tryGetSYCLKernelID(KernelName)) return *MaybeKernelID; throw exception(make_error_code(errc::runtime), @@ -2397,13 +2397,13 @@ std::vector ProgramManager::getAllSYCLKernelIDs() { std::vector AllKernelIDs; AllKernelIDs.reserve(m_KernelName2KernelIDs.size()); - for (std::pair KernelID : m_KernelName2KernelIDs) { + for (std::pair KernelID : m_KernelName2KernelIDs) { AllKernelIDs.push_back(KernelID.second); } return AllKernelIDs; } -kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) { +kernel_id ProgramManager::getBuiltInKernelID(KernelNameStrRefT KernelName) { std::lock_guard BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex); auto KernelID = m_BuiltInKernelIDs.find(KernelName); @@ -3150,7 +3150,7 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, // its ref count incremented. std::tuple ProgramManager::getOrCreateKernel(const context &Context, - const std::string &KernelName, + KernelNameStrRefT KernelName, const property_list &PropList, ur_program_handle_t Program) { @@ -3168,7 +3168,7 @@ ProgramManager::getOrCreateKernel(const context &Context, ur_kernel_handle_t Kernel = nullptr; const AdapterPtr &Adapter = Ctx->getAdapter(); - Adapter->call(Program, KernelName.c_str(), + Adapter->call(Program, KernelName.data(), &Kernel); // Only set UR_USM_INDIRECT_ACCESS if the platform can handle it. @@ -3213,7 +3213,7 @@ ProgramManager::getOrCreateKernel(const context &Context, } ur_kernel_handle_t ProgramManager::getCachedMaterializedKernel( - const std::string &KernelName, + KernelNameStrRefT KernelName, const std::vector &SpecializationConsts) { if constexpr (DbgProgMgr > 0) std::cerr << ">>> ProgramManager::getCachedMaterializedKernel\n" @@ -3244,7 +3244,7 @@ ur_kernel_handle_t ProgramManager::getCachedMaterializedKernel( ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( const RTDeviceBinaryImage &Img, const context &Context, - const device &Device, const std::string &KernelName, + const device &Device, KernelNameStrRefT KernelName, const std::vector &SpecializationConsts) { // Check if we already have the kernel in the cache. if constexpr (DbgProgMgr > 0) @@ -3278,7 +3278,7 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( ExtraProgramsToLink); ur_kernel_handle_t UrKernel{nullptr}; Adapter->call( - BuildProgram.get(), KernelName.c_str(), &UrKernel); + BuildProgram.get(), KernelName.data(), &UrKernel); { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); m_MaterializedKernels[KernelName][SpecializationConsts] = UrKernel; diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 906692cdcac25..e0ccabef2860e 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -134,7 +134,7 @@ class ProgramManager { // process. Can only be called after staticInit is done. static ProgramManager &getInstance(); - RTDeviceBinaryImage &getDeviceImage(const std::string &KernelName, + RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, const ContextImplPtr &ContextImpl, const device &Device); @@ -178,7 +178,7 @@ class ProgramManager { /// \param KernelName the kernel's name ur_program_handle_t getBuiltURProgram(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, - const std::string &KernelName, + KernelNameStrRefT KernelName, const NDRDescT &NDRDesc = {}); /// Builds a program from a given set of images or retrieves that program from @@ -202,16 +202,15 @@ class ProgramManager { ur_program_handle_t> getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, - const std::string &KernelName, - const NDRDescT &NDRDesc = {}); + KernelNameStrRefT KernelName, const NDRDescT &NDRDesc = {}); ur_kernel_handle_t getCachedMaterializedKernel( - const std::string &KernelName, + KernelNameStrRefT KernelName, const std::vector &SpecializationConsts); ur_kernel_handle_t getOrCreateMaterializedKernel( const RTDeviceBinaryImage &Img, const context &Context, - const device &Device, const std::string &KernelName, + const device &Device, KernelNameStrRefT KernelName, const std::vector &SpecializationConsts); ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, @@ -232,17 +231,16 @@ class ProgramManager { /// within the native program. /// \param NativePrg the UR program associated with the kernel. /// \param KernelName the name of the kernel. - const KernelArgMask * - getEliminatedKernelArgMask(ur_program_handle_t NativePrg, - const std::string &KernelName); + const KernelArgMask *getEliminatedKernelArgMask(ur_program_handle_t NativePrg, + KernelNameStrRefT KernelName); // The function returns the unique SYCL kernel identifier associated with a // kernel name or nullopt if there is no such ID. - std::optional tryGetSYCLKernelID(const std::string &KernelName); + std::optional tryGetSYCLKernelID(KernelNameStrRefT KernelName); // The function returns the unique SYCL kernel identifier associated with a // kernel name or throws a sycl exception if there is no such ID. - kernel_id getSYCLKernelID(const std::string &KernelName); + kernel_id getSYCLKernelID(KernelNameStrRefT KernelName); // The function returns a vector containing all unique SYCL kernel identifiers // in SYCL device images. @@ -250,7 +248,7 @@ class ProgramManager { // The function returns the unique SYCL kernel identifier associated with a // built-in kernel name. - kernel_id getBuiltInKernelID(const std::string &KernelName); + kernel_id getBuiltInKernelID(KernelNameStrRefT KernelName); // The function inserts or initializes a device_global entry into the // device_global map. @@ -354,7 +352,7 @@ class ProgramManager { const property_list &PropList); std::tuple - getOrCreateKernel(const context &Context, const std::string &KernelName, + getOrCreateKernel(const context &Context, KernelNameStrRefT KernelName, const property_list &PropList, ur_program_handle_t Program); ProgramManager(); @@ -368,7 +366,7 @@ class ProgramManager { SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } std::optional - kernelImplicitLocalArgPos(const std::string &KernelName) const; + kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; std::set getRawDeviceImages(const std::vector &KernelIDs); @@ -420,7 +418,7 @@ class ProgramManager { /// when C++20 is enabled for the runtime library. /// Access must be guarded by the m_KernelIDsMutex mutex. // - std::unordered_map m_KernelName2KernelIDs; + std::unordered_map m_KernelName2KernelIDs; // Maps KernelIDs to device binary images. There can be more than one image // in case of SPIRV + AOT. @@ -450,12 +448,13 @@ class ProgramManager { /// in the sycl::detail::__sycl_service_kernel__ namespace which is /// exclusively used for this purpose. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap m_ServiceKernels; + std::unordered_multimap + m_ServiceKernels; /// Caches all exported symbols to allow faster lookup when excluding these // from kernel bundles. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_ExportedSymbolImages; /// Keeps all device images we are refering to during program lifetime. Used @@ -465,7 +464,7 @@ class ProgramManager { /// Maps names of built-in kernels to their unique kernel IDs. /// Access must be guarded by the m_BuiltInKernelIDsMutex mutex. - std::unordered_map m_BuiltInKernelIDs; + std::unordered_map m_BuiltInKernelIDs; /// Caches list of device images that use or provide virtual functions from /// the same set. Used to simplify access. @@ -496,7 +495,8 @@ class ProgramManager { /// Protects NativePrograms that can be changed by class' methods. std::mutex MNativeProgramsMutex; - using KernelNameToArgMaskMap = std::unordered_map; + using KernelNameToArgMaskMap = + std::unordered_map; /// Maps binary image and kernel name pairs to kernel argument masks which /// specify which arguments were eliminated during device code optimization. std::unordered_map @@ -510,15 +510,15 @@ class ProgramManager { // different types without temporary key_type object creation. This includes // standard overloads, such as comparison between std::string and // std::string_view or just char*. - using KernelUsesAssertSet = std::set>; + using KernelUsesAssertSet = std::set>; KernelUsesAssertSet m_KernelUsesAssert; - std::unordered_map m_KernelImplicitLocalArgPos; + std::unordered_map m_KernelImplicitLocalArgPos; // Sanitizer type used in device image SanitizerType m_SanitizerFoundInImage; // Maps between device_global identifiers and associated information. - std::unordered_map> + std::unordered_map> m_DeviceGlobals; std::unordered_map m_Ptr2DeviceGlobal; @@ -535,7 +535,7 @@ class ProgramManager { using MaterializedEntries = std::map, ur_kernel_handle_t>; - std::unordered_map m_MaterializedKernels; + std::unordered_map m_MaterializedKernels; // Holds bfloat16 device library images, the 1st element is for fallback // version and 2nd is for native version. These bfloat16 device library diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 9955855d5e2e4..a461d0127ad4b 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -882,7 +882,7 @@ class queue_impl { KernelUsesAssert = (!Handler.MKernel || Handler.MKernel->hasSYCLMetadata()) && ProgramManager::getInstance().kernelUsesAssert( - Handler.MKernelName.c_str()); + Handler.MKernelName.data()); auto Event = MIsInorder ? finalizeHandlerInOrder(Handler) : finalizeHandlerOutOfOrder(Handler); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 0bfa89e3dfb80..bf0a061fbee35 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1981,7 +1981,7 @@ std::string instrumentationGetKernelName( void instrumentationAddExtraKernelMetadata( xpti_td *&CmdTraceEvent, const NDRDescT &NDRDesc, const std::shared_ptr &KernelBundleImplPtr, - const std::string &KernelName, + KernelNameStrRefT KernelName, const std::shared_ptr &SyclKernel, const QueueImplPtr &Queue, std::vector &CGArgs) // CGArgs are not const since they could be @@ -2391,7 +2391,7 @@ static ur_result_t SetKernelParamsAndLaunch( const std::function &getMemAllocationFunc, bool IsCooperative, bool KernelUsesClusterLaunch, uint32_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage, - const std::string &KernelName) { + KernelNameStrRefT KernelName) { assert(Queue && "Kernel submissions should have an associated queue"); const AdapterPtr &Adapter = Queue->getAdapter(); @@ -2660,7 +2660,7 @@ void enqueueImpKernel( const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector &Args, const std::shared_ptr &KernelBundleImplPtr, const std::shared_ptr &MSyclKernel, - const std::string &KernelName, std::vector &RawEvents, + KernelNameStrRefT KernelName, std::vector &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, @@ -3238,7 +3238,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { const std::shared_ptr &SyclKernel = ExecKernel->MSyclKernel; - const std::string &KernelName = ExecKernel->MKernelName; + KernelNameStrRefT KernelName = ExecKernel->MKernelName; if (!EventImpl) { // Kernel only uses assert if it's non interop one @@ -3253,7 +3253,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { const RTDeviceBinaryImage *BinImage = nullptr; if (detail::SYCLConfig::get()) { std::tie(BinImage, std::ignore) = - retrieveKernelBinary(MQueue, KernelName.c_str()); + retrieveKernelBinary(MQueue, KernelName.data()); assert(BinImage && "Failed to obtain a binary image."); } enqueueImpKernel(MQueue, NDRDesc, Args, ExecKernel->getKernelBundle(), diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 2ab553159f1dc..a602f7c4b373c 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -624,7 +624,7 @@ void enqueueImpKernel( const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector &Args, const std::shared_ptr &KernelBundleImplPtr, const std::shared_ptr &MSyclKernel, - const std::string &KernelName, std::vector &RawEvents, + KernelNameStrRefT KernelName, std::vector &RawEvents, const detail::EventImplPtr &Event, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index c36ff2acbb21a..eb52b490efd71 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -589,7 +589,7 @@ void Scheduler::cleanupAuxiliaryResources(BlockingT Blocking) { ur_kernel_handle_t Scheduler::completeSpecConstMaterialization( [[maybe_unused]] const QueueImplPtr &Queue, [[maybe_unused]] const RTDeviceBinaryImage *BinImage, - [[maybe_unused]] const std::string &KernelName, + [[maybe_unused]] KernelNameStrRefT KernelName, [[maybe_unused]] std::vector &SpecConstBlob) { #if SYCL_EXT_JIT_ENABLE && !_WIN32 return detail::jit_compiler::get_instance().materializeSpecConstants( diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 0cb7dda96d33a..5b657c1f13b93 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -453,7 +454,7 @@ class Scheduler { ur_kernel_handle_t completeSpecConstMaterialization( const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage, - const std::string &KernelName, std::vector &SpecConstBlob); + KernelNameStrRefT KernelName, std::vector &SpecConstBlob); void releaseResources(BlockingT Blocking = BlockingT::BLOCKING); bool isDeferredMemObjectsEmpty(); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5fd688713acc1..4ef49f1388ebd 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -433,13 +433,13 @@ event handler::finalize() { !(MKernel && MKernel->isInterop()) && (KernelBundleImpPtr->empty() || KernelBundleImpPtr->hasSYCLOfflineImages()) && - !KernelBundleImpPtr->tryGetKernel(MKernelName.c_str(), + !KernelBundleImpPtr->tryGetKernel(MKernelName.data(), KernelBundleImpPtr)) { auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID( - MKernelName.c_str()); + MKernelName.data()); bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev); // If kernel was not inserted and the bundle is in input mode we try // building it and trying to find the kernel in executable mode @@ -502,7 +502,7 @@ event handler::finalize() { bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && detail::ProgramManager::getInstance().kernelUsesAssert( - MKernelName.c_str()); + MKernelName.data()); DiscardEvent = !KernelUsesAssert; } @@ -510,7 +510,7 @@ event handler::finalize() { // uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent, int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData( - StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, MKernelName.c_str(), + StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, MKernelName.data(), MQueue, impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent, InstanceID = InstanceID]() { @@ -524,12 +524,12 @@ event handler::finalize() { const detail::RTDeviceBinaryImage *BinImage = nullptr; if (detail::SYCLConfig::get()) { std::tie(BinImage, std::ignore) = - detail::retrieveKernelBinary(MQueue, MKernelName.c_str()); + detail::retrieveKernelBinary(MQueue, MKernelName.data()); assert(BinImage && "Failed to obtain a binary image."); } enqueueImpKernel( MQueue, impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel, - MKernelName.c_str(), RawEvents, + MKernelName.data(), RawEvents, DiscardEvent ? detail::EventImplPtr{} : LastEventImpl, nullptr, impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, @@ -577,7 +577,7 @@ event handler::finalize() { CommandGroup.reset(new detail::CGExecKernel( std::move(impl->MNDRDesc), std::move(MHostKernel), std::move(MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), - std::move(impl->MArgs), MKernelName.c_str(), std::move(MStreamStorage), + std::move(impl->MArgs), MKernelName.data(), std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, @@ -1198,8 +1198,8 @@ void handler::extractArgsAndReqsFromLambda( // Calling methods of kernel_impl requires knowledge of class layout. // As this is impossible in header, there's a function that calls necessary // method inside the library and returns the result. -detail::string handler::getKernelName() { - return detail::string{MKernel->get_info()}; +detail::ABINeutralKernelNameStrT handler::getKernelName() { + return MKernel->getName(); } void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) { diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index b76826f8c318f..3be3b4932004d 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -126,6 +126,7 @@ // CHECK-NEXT: CL/cl_platform.h // CHECK-NEXT: CL/cl_ext.h // CHECK-NEXT: detail/id_queries_fit_in_int.hpp +// CHECK-NEXT: detail/kernel_name_str_t.hpp // CHECK-NEXT: detail/reduction_forward.hpp // CHECK-NEXT: detail/ur.hpp // CHECK-NEXT: ur_api_funcs.def diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 52f8c5ff3589f..2316f7e5f8066 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -17,7 +17,8 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_KernelIDs2BinImage; } - std::unordered_map &getKernelName2KernelID() { + std::unordered_map & + getKernelName2KernelID() { return m_KernelName2KernelIDs; } @@ -27,12 +28,14 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_BinImg2KernelIDs; } - std::unordered_multimap & + std::unordered_multimap & getServiceKernels() { return m_ServiceKernels; } - std::unordered_multimap & + std::unordered_multimap & getExportedSymbolImages() { return m_ExportedSymbolImages; } @@ -57,16 +60,17 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return NativePrograms; } - std::unordered_map< - const sycl::detail::RTDeviceBinaryImage *, - std::unordered_map> & + std::unordered_map> & getEliminatedKernelArgMask() { return m_EliminatedKernelArgMasks; } KernelUsesAssertSet &getKernelUsesAssert() { return m_KernelUsesAssert; } - std::unordered_map &getKernelImplicitLocalArgPos() { + std::unordered_map & + getKernelImplicitLocalArgPos() { return m_KernelImplicitLocalArgPos; } @@ -81,7 +85,7 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_Ptr2HostPipe; } - std::unordered_map> & getDeviceGlobals() { return m_DeviceGlobals; diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index a290a68542bdb..9d5795b3ddda7 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -129,7 +129,7 @@ class MockHandler : public sycl::handler { std::move(impl->MNDRDesc), std::move(CGH->MHostKernel), std::move(CGH->MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), - CGH->MKernelName.c_str(), std::move(CGH->MStreamStorage), + CGH->MKernelName.data(), std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), impl->MCGType, {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, CGH->MCodeLoc)); diff --git a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp index d0b26bbd7663b..0c004dc876826 100644 --- a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp +++ b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp @@ -27,7 +27,8 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { sycl::accessor B; - MockCGH.single_task([=]() { + constexpr size_t KernelSize = sizeof(B); + MockCGH.single_task>([=]() { int size = B.size(); (void)size; }); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 3eed6ccef4970..0d03ae68d2d17 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -271,7 +272,7 @@ class MockHandler : public sycl::handler { return impl->CGData.MEvents; } std::vector &getArgs() { return impl->MArgs; } - std::string getKernelName() { return MKernelName.c_str(); } + std::string getKernelName() { return MKernelName.data(); } std::shared_ptr &getKernel() { return MKernel; } std::shared_ptr &getHostTask() { return impl->MHostTask;