From 7a259f088541aa25acd1bd2b7a10f195d945445b Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 19 Aug 2025 13:22:22 +0200 Subject: [PATCH 01/25] [SYCL] Refactor kernel name based cache approach --- .../detail/get_kernel_name_based_data.hpp | 38 +++++ .../sycl/detail/kernel_name_based_cache.hpp | 29 ---- .../include/sycl/detail/kernel_name_str_t.hpp | 2 + sycl/include/sycl/handler.hpp | 9 +- sycl/source/CMakeLists.txt | 3 +- sycl/source/detail/cg.hpp | 6 +- ...che.cpp => get_kernel_name_based_data.cpp} | 13 +- sycl/source/detail/global_handler.cpp | 17 ++- sycl/source/detail/global_handler.hpp | 13 +- sycl/source/detail/graph/graph_impl.cpp | 5 +- sycl/source/detail/handler_impl.hpp | 2 +- sycl/source/detail/kernel_name_based_data.cpp | 56 +++++++ ...cache_t.hpp => kernel_name_based_data.hpp} | 47 ++++-- sycl/source/detail/kernel_program_cache.hpp | 52 ++----- .../program_manager/program_manager.cpp | 141 +++++++++--------- .../program_manager/program_manager.hpp | 29 ++-- sycl/source/detail/queue_impl.hpp | 4 +- sycl/source/detail/scheduler/commands.cpp | 38 +++-- sycl/source/detail/scheduler/commands.hpp | 4 +- sycl/source/handler.cpp | 35 ++++- sycl/test/abi/sycl_symbols_linux.dump | 4 +- sycl/test/abi/sycl_symbols_windows.dump | 4 +- .../include_deps/sycl_detail_core.hpp.cpp | 4 +- .../arg_mask/EliminatedArgMask.cpp | 2 +- .../scheduler/SchedulerTestUtils.hpp | 2 +- .../scheduler/StreamInitDependencyOnHost.cpp | 2 +- 26 files changed, 334 insertions(+), 227 deletions(-) create mode 100644 sycl/include/sycl/detail/get_kernel_name_based_data.hpp delete mode 100644 sycl/include/sycl/detail/kernel_name_based_cache.hpp rename sycl/source/detail/{kernel_name_based_cache.cpp => get_kernel_name_based_data.cpp} (58%) create mode 100644 sycl/source/detail/kernel_name_based_data.cpp rename sycl/source/detail/{kernel_name_based_cache_t.hpp => kernel_name_based_data.hpp} (66%) diff --git a/sycl/include/sycl/detail/get_kernel_name_based_data.hpp b/sycl/include/sycl/detail/get_kernel_name_based_data.hpp new file mode 100644 index 000000000000..f13dc15a327e --- /dev/null +++ b/sycl/include/sycl/detail/get_kernel_name_based_data.hpp @@ -0,0 +1,38 @@ +//==--------------------- get_kernel_name_based_data.hpp -------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +class KernelNameBasedCacheT; +__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); +#endif + +class KernelNameBasedData; + +__SYCL_EXPORT KernelNameBasedData * +getKernelNameBasedDataImpl(detail::ABINeutralKernelNameStrRefT KernelName); + +// Retrieves and caches a data pointer to avoid kernel name based lookup +// overhead. +template +KernelNameBasedData * +getKernelNameBasedData(detail::ABINeutralKernelNameStrRefT KernelName) { + static KernelNameBasedData *Instance = getKernelNameBasedDataImpl(KernelName); + return Instance; +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/kernel_name_based_cache.hpp b/sycl/include/sycl/detail/kernel_name_based_cache.hpp deleted file mode 100644 index 6bd2e38edc8e..000000000000 --- a/sycl/include/sycl/detail/kernel_name_based_cache.hpp +++ /dev/null @@ -1,29 +0,0 @@ -//==--------------------- kernel_name_based_cache.hpp ----------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include - -namespace sycl { -inline namespace _V1 { -namespace detail { - -struct KernelNameBasedCacheT; -__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); - -// Retrieves a cache pointer unique to a kernel name type that can be used to -// avoid kernel name based lookup in the runtime. -template -KernelNameBasedCacheT *getKernelNameBasedCache() { - static KernelNameBasedCacheT *Instance = createKernelNameBasedCache(); - return Instance; -} - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/include/sycl/detail/kernel_name_str_t.hpp b/sycl/include/sycl/detail/kernel_name_str_t.hpp index e0079ffb09c7..b8ceb395431d 100644 --- a/sycl/include/sycl/detail/kernel_name_str_t.hpp +++ b/sycl/include/sycl/detail/kernel_name_str_t.hpp @@ -18,10 +18,12 @@ namespace detail { using KernelNameStrT = std::string_view; using KernelNameStrRefT = std::string_view; using ABINeutralKernelNameStrT = detail::string_view; +using ABINeutralKernelNameStrRefT = detail::string_view; #else using KernelNameStrT = std::string; using KernelNameStrRefT = const std::string &; using ABINeutralKernelNameStrT = detail::string; +using ABINeutralKernelNameStrRefT = const detail::string &; #endif inline KernelNameStrT toKernelNameStrT(const ABINeutralKernelNameStrT &str) { diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 0185c611bec5..909a958ff181 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -14,11 +14,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -863,6 +863,8 @@ class __SYCL_EXPORT handler { constexpr std::string_view KernelNameStr = detail::getKernelName(); MKernelName = KernelNameStr; + setKernelNameBasedDataPtr( + detail::getKernelNameBasedData(KernelNameStr)); } else { // In case w/o the integration header it is necessary to process // accessors from the list(which are associated with this handler) as @@ -870,7 +872,6 @@ class __SYCL_EXPORT handler { // later during finalize. setArgsToAssociatedAccessors(); } - setKernelNameBasedCachePtr(detail::getKernelNameBasedCache()); // If the kernel lambda is callable with a kernel_handler argument, manifest // the associated kernel handler. @@ -3685,8 +3686,12 @@ class __SYCL_EXPORT handler { sycl::handler &h, size_t size, const ext::oneapi::experimental::memory_pool &pool); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void setKernelNameBasedCachePtr( detail::KernelNameBasedCacheT *KernelNameBasedCachePtr); +#endif + void setKernelNameBasedDataPtr( + detail::KernelNameBasedData *KernelNameBasedDataPtr); queue getQueue(); diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 24a471eacb0f..93bb4b3a6812 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -288,7 +288,8 @@ set(SYCL_COMMON_SOURCES "detail/kernel_compiler/kernel_compiler_opencl.cpp" "detail/kernel_compiler/kernel_compiler_sycl.cpp" "detail/kernel_impl.cpp" - "detail/kernel_name_based_cache.cpp" + "detail/get_kernel_name_based_data.cpp" + "detail/kernel_name_based_data.cpp" "detail/kernel_program_cache.cpp" "detail/memory_export.cpp" "detail/memory_manager.cpp" diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 87e7b088951a..29ed9213c315 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -254,7 +254,7 @@ class CGExecKernel : public CG { std::shared_ptr MKernelBundle; std::vector MArgs; KernelNameStrT MKernelName; - KernelNameBasedCacheT *MKernelNameBasedCachePtr; + KernelNameBasedData *MKernelNameBasedDataPtr; std::vector> MStreams; std::vector> MAuxiliaryResources; /// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list @@ -270,7 +270,7 @@ class CGExecKernel : public CG { std::shared_ptr KernelBundle, CG::StorageInitHelper CGData, std::vector Args, KernelNameStrT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameBasedData *KernelNameBasedDataPtr, std::vector> Streams, std::vector> AuxiliaryResources, CGType Type, ur_kernel_cache_config_t KernelCacheConfig, @@ -280,7 +280,7 @@ class CGExecKernel : public CG { MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)), MKernelName(std::move(KernelName)), - MKernelNameBasedCachePtr(KernelNameBasedCachePtr), + MKernelNameBasedDataPtr(KernelNameBasedDataPtr), MStreams(std::move(Streams)), MAuxiliaryResources(std::move(AuxiliaryResources)), MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)), diff --git a/sycl/source/detail/kernel_name_based_cache.cpp b/sycl/source/detail/get_kernel_name_based_data.cpp similarity index 58% rename from sycl/source/detail/kernel_name_based_cache.cpp rename to sycl/source/detail/get_kernel_name_based_data.cpp index 17356e7f38fc..5e8aa6c3f540 100644 --- a/sycl/source/detail/kernel_name_based_cache.cpp +++ b/sycl/source/detail/get_kernel_name_based_data.cpp @@ -1,4 +1,4 @@ -//==--------------------- kernel_name_based_cache.cpp ----------------------==// +//==-------------------- get_kernel_name_based_data.cpp --------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,15 +7,24 @@ //===----------------------------------------------------------------------===// #include -#include +#include +#include namespace sycl { inline namespace _V1 { namespace detail { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *createKernelNameBasedCache() { return GlobalHandler::instance().createKernelNameBasedCache(); } +#endif + +KernelNameBasedData * +getKernelNameBasedDataImpl(detail::ABINeutralKernelNameStrRefT KernelName) { + return ProgramManager::getInstance().getOrCreateKernelNameBasedData( + KernelName.data()); +} } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 66b181a9bc0e..47f6cba89d8c 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -249,12 +249,15 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() { return TP; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *GlobalHandler::createKernelNameBasedCache() { - static std::deque &KernelNameBasedCaches = - getOrCreate(MKernelNameBasedCaches); - LockGuard LG{MKernelNameBasedCaches.Lock}; - return &KernelNameBasedCaches.emplace_back(); + static std::deque &KernelNameBasedDataStorage = + getOrCreate(MKernelNameBasedDataStorage); + LockGuard LG{MKernelNameBasedDataStorage.Lock}; + return reinterpret_cast( + &KernelNameBasedDataStorage.emplace_back()); } +#endif void GlobalHandler::releaseDefaultContexts() { // Release shared-pointers to SYCL objects. @@ -390,9 +393,11 @@ void shutdown_late() { Handler->MScheduler.Inst.reset(nullptr); Handler->MProgramManager.Inst.reset(nullptr); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES // Cache stores handles to the adapter, so clear it before // releasing adapters. - Handler->MKernelNameBasedCaches.Inst.reset(nullptr); + Handler->MKernelNameBasedDataStorage.Inst.reset(nullptr); +#endif // Clear the adapters and reset the instance if it was there. Handler->unloadAdapters(); diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 5be68ef06572..9d330007d3eb 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -11,7 +11,9 @@ #include #include +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES #include +#endif #include #include @@ -27,7 +29,10 @@ class adapter_impl; class ods_target_list; class XPTIRegistry; class ThreadPool; -struct KernelNameBasedCacheT; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +class KernelNameBasedCacheT; +class KernelNameBasedData; +#endif /// Wrapper class for global data structures with non-trivial destructors. /// @@ -73,7 +78,9 @@ class GlobalHandler { ods_target_list &getOneapiDeviceSelectorTargets(const std::string &InitValue); XPTIRegistry &getXPTIRegistry(); ThreadPool &getHostTaskThreadPool(); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *createKernelNameBasedCache(); +#endif static void registerStaticVarShutdownHandler(); bool isOkToDefer() const; @@ -130,7 +137,9 @@ class GlobalHandler { InstWithLock MXPTIRegistry; // Thread pool for host task and event callbacks execution InstWithLock MHostTaskThreadPool; - InstWithLock> MKernelNameBasedCaches; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + InstWithLock> MKernelNameBasedDataStorage; +#endif }; } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index b72057cc30ae..52dd77067ebb 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -741,7 +741,7 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( CGExec->MLine, CGExec->MColumn); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc, - CGExec->MKernelName.data(), CGExec->MKernelNameBasedCachePtr, nullptr, + CGExec->MKernelName.data(), CGExec->MKernelNameBasedDataPtr, nullptr, CGExec->MNDRDesc, CGExec->MKernelBundle.get(), CGExec->MArgs); if (CmdTraceEvent) sycl::detail::emitInstrumentationGeneral( @@ -1573,9 +1573,10 @@ void exec_graph_impl::populateURKernelUpdateStructs( UrKernel = SyclKernelImpl->getHandleRef(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { + assert(ExecCG.MKernelNameBasedDataPtr); BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, ExecCG.MKernelName, - ExecCG.MKernelNameBasedCachePtr); + *ExecCG.MKernelNameBasedDataPtr); UrKernel = BundleObjs->MKernelHandle; EliminatedArgMask = BundleObjs->MKernelArgMask; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index edf32dfa80f7..b5b718ea89cb 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -244,7 +244,7 @@ class handler_impl { bool MKernelHasSpecialCaptures = true; // A pointer to a kernel name based cache retrieved on the application side. - KernelNameBasedCacheT *MKernelNameBasedCachePtr = nullptr; + KernelNameBasedData *MKernelNameBasedDataPtr = nullptr; }; } // namespace detail diff --git a/sycl/source/detail/kernel_name_based_data.cpp b/sycl/source/detail/kernel_name_based_data.cpp new file mode 100644 index 000000000000..a525d9d7969e --- /dev/null +++ b/sycl/source/detail/kernel_name_based_data.cpp @@ -0,0 +1,56 @@ +//==---------------------- kernel_name_based_data.cpp ----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +KernelNameBasedData::KernelNameBasedData(KernelNameStrRefT KernelName) { + init(KernelName); +} + +void KernelNameBasedData::init(KernelNameStrRefT KernelName) { + auto &PM = detail::ProgramManager::getInstance(); + MUsesAssert = PM.kernelUsesAssert(KernelName); + MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + MInitialized.store(true); +#endif +} + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +void KernelNameBasedData::initIfNeeded(KernelNameStrRefT KernelName) { + if (!MInitialized.load()) + init(KernelName); +} +#endif + +FastKernelSubcacheT &KernelNameBasedData::getKernelSubcache() { + assertInitialized(); + return MFastKernelSubcache; +} +bool KernelNameBasedData::usesAssert() { + assertInitialized(); + return MUsesAssert; +} +const std::optional &KernelNameBasedData::getImplicitLocalArgPos() { + assertInitialized(); + return MImplicitLocalArgPos; +} + +void KernelNameBasedData::assertInitialized() { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + assert(MInitialized.load() && "Cache needs to be initialized before use"); +#endif +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl \ No newline at end of file diff --git a/sycl/source/detail/kernel_name_based_cache_t.hpp b/sycl/source/detail/kernel_name_based_data.hpp similarity index 66% rename from sycl/source/detail/kernel_name_based_cache_t.hpp rename to sycl/source/detail/kernel_name_based_data.hpp index e7c9b049a319..08c4a89caef3 100644 --- a/sycl/source/detail/kernel_name_based_cache_t.hpp +++ b/sycl/source/detail/kernel_name_based_data.hpp @@ -1,4 +1,4 @@ -//==-------------------- kernel_name_based_cache_t.hpp ---------------------==// +//==---------------------- kernel_name_based_data.hpp ----------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -23,9 +24,9 @@ using FastKernelCacheKeyT = std::pair; struct FastKernelCacheVal { Managed MKernelHandle; /* UR kernel. */ - std::mutex *MMutex; /* Mutex guarding this kernel. When - caching is disabled, the pointer is - nullptr. */ + std::mutex *MMutex; /* Mutex guarding this kernel. When + caching is disabled, the pointer is + nullptr. */ const KernelArgMask *MKernelArgMask; /* Eliminated kernel argument mask. */ Managed MProgramHandle; /* UR program handle corresponding to this kernel. */ @@ -76,13 +77,37 @@ struct FastKernelSubcacheT { FastKernelSubcacheMutexT Mutex; }; -struct KernelNameBasedCacheT { - FastKernelSubcacheT FastKernelSubcache; - std::optional UsesAssert; - // Implicit local argument position is represented by an optional int, this - // uses another optional on top of that to represent lazy initialization of - // the cached value. - std::optional> ImplicitLocalArgPos; +// This class is used for aggregating kernel name based information. +// Pointers to instances of this class are stored in header function templates +// as a static variable to avoid repeated runtime lookup overhead. + +// TODO Currently this class duplicates information fetched from the program +// manager. Instead, we should merge all of the kernel name based information +// into this structure and get rid of the other KernelName -> * maps. +class KernelNameBasedData { +public: +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + KernelNameBasedData() = default; +#endif + KernelNameBasedData(KernelNameStrRefT KernelName); + + void init(KernelNameStrRefT KernelName); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + void initIfNeeded(KernelNameStrRefT KernelName); +#endif + FastKernelSubcacheT &getKernelSubcache(); + bool usesAssert(); + const std::optional &getImplicitLocalArgPos(); + +private: + void assertInitialized(); + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + std::atomic MInitialized = false; +#endif + FastKernelSubcacheT MFastKernelSubcache; + bool MUsesAssert; + std::optional MImplicitLocalArgPos; }; } // namespace detail diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index e8bf8e5bba04..1d5e45aa52a4 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -11,7 +11,7 @@ #include "sycl/exception.hpp" #include #include -#include +#include #include #include #include @@ -219,25 +219,18 @@ class KernelProgramCache { class FastKernelSubcacheWrapper { public: - FastKernelSubcacheWrapper(FastKernelSubcacheT *CachePtr, + FastKernelSubcacheWrapper(FastKernelSubcacheT &Subcache, ur_context_handle_t UrContext) - : MSubcachePtr{CachePtr}, MUrContext{UrContext} { - if (!MSubcachePtr) { - MOwnsSubcache = true; - MSubcachePtr = new FastKernelSubcacheT(); - } - } + : MSubcachePtr{&Subcache}, MUrContext{UrContext} {} FastKernelSubcacheWrapper(const FastKernelSubcacheWrapper &) = delete; FastKernelSubcacheWrapper(FastKernelSubcacheWrapper &&Other) - : MSubcachePtr{Other.MSubcachePtr}, MOwnsSubcache{Other.MOwnsSubcache}, - MUrContext{Other.MUrContext} { + : MSubcachePtr{Other.MSubcachePtr}, MUrContext{Other.MUrContext} { Other.MSubcachePtr = nullptr; } FastKernelSubcacheWrapper & operator=(const FastKernelSubcacheWrapper &) = delete; FastKernelSubcacheWrapper &operator=(FastKernelSubcacheWrapper &&Other) { MSubcachePtr = Other.MSubcachePtr; - MOwnsSubcache = Other.MOwnsSubcache; MUrContext = Other.MUrContext; Other.MSubcachePtr = nullptr; return *this; @@ -247,11 +240,6 @@ class KernelProgramCache { if (!MSubcachePtr) return; - if (MOwnsSubcache) { - delete MSubcachePtr; - return; - } - // Single subcache might be used by different contexts. // Remove all entries from the subcache that are associated with the // current context. @@ -267,8 +255,7 @@ class KernelProgramCache { FastKernelSubcacheT &get() { return *MSubcachePtr; } private: - FastKernelSubcacheT *MSubcachePtr = nullptr; - bool MOwnsSubcache = false; + FastKernelSubcacheT *MSubcachePtr; ur_context_handle_t MUrContext = nullptr; }; @@ -455,18 +442,9 @@ class KernelProgramCache { FastKernelCacheValPtr tryToGetKernelFast(KernelNameStrRefT KernelName, ur_device_handle_t Device, - FastKernelSubcacheT *KernelSubcacheHint) { - FastKernelCacheWriteLockT Lock(MFastKernelCacheMutex); - if (!KernelSubcacheHint) { - auto It = MFastKernelCache.try_emplace( - KernelName, - FastKernelSubcacheWrapper(KernelSubcacheHint, getURContext())); - KernelSubcacheHint = &It.first->second.get(); - } - - const FastKernelSubcacheEntriesT &SubcacheEntries = - KernelSubcacheHint->Entries; - FastKernelSubcacheReadLockT SubcacheLock{KernelSubcacheHint->Mutex}; + FastKernelSubcacheT &KernelSubcache) { + const FastKernelSubcacheEntriesT &SubcacheEntries = KernelSubcache.Entries; + FastKernelSubcacheReadLockT SubcacheLock{KernelSubcache.Mutex}; ur_context_handle_t Context = getURContext(); const FastKernelCacheKeyT RequiredKey(Device, Context); // Search for the kernel in the subcache. @@ -484,7 +462,7 @@ class KernelProgramCache { void saveKernel(KernelNameStrRefT KernelName, ur_device_handle_t Device, const FastKernelCacheValPtr &CacheVal, - FastKernelSubcacheT *KernelSubcacheHint) { + FastKernelSubcacheT &KernelSubcache) { if (SYCLConfig:: isProgramCacheEvictionEnabled()) { // Save kernel in fast cache only if the corresponding program is also @@ -504,15 +482,13 @@ class KernelProgramCache { // if no insertion took place, then some other thread has already inserted // smth in the cache traceKernel("Kernel inserted.", KernelName, true); - auto It = MFastKernelCache.try_emplace( - KernelName, - FastKernelSubcacheWrapper(KernelSubcacheHint, getURContext())); - KernelSubcacheHint = &It.first->second.get(); + MFastKernelCache.try_emplace( + KernelName, FastKernelSubcacheWrapper(KernelSubcache, getURContext())); - FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcacheHint->Mutex}; + FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcache.Mutex}; ur_context_handle_t Context = getURContext(); - KernelSubcacheHint->Entries.emplace_back( - FastKernelCacheKeyT(Device, Context), CacheVal); + KernelSubcache.Entries.emplace_back(FastKernelCacheKeyT(Device, Context), + CacheVal); } // Expects locked program cache diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 77f28a5131f8..13e6971330bb 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1084,8 +1084,8 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, FastKernelCacheValPtr ProgramManager::getOrCreateKernel( context_impl &ContextImpl, device_impl &DeviceImpl, - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, const NDRDescT &NDRDesc) { + KernelNameStrRefT KernelName, KernelNameBasedData &KernelNameBasedData, + const NDRDescT &NDRDesc) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << &ContextImpl << ", " << &DeviceImpl << ", " << KernelName << ")\n"; @@ -1093,12 +1093,9 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( KernelProgramCache &Cache = ContextImpl.getKernelProgramCache(); ur_device_handle_t UrDevice = DeviceImpl.getHandleRef(); - FastKernelSubcacheT *CacheHintPtr = - KernelNameBasedCachePtr ? &KernelNameBasedCachePtr->FastKernelSubcache - : nullptr; if (SYCLConfig::get()) { - if (auto KernelCacheValPtr = - Cache.tryToGetKernelFast(KernelName, UrDevice, CacheHintPtr)) { + if (auto KernelCacheValPtr = Cache.tryToGetKernelFast( + KernelName, UrDevice, KernelNameBasedData.getKernelSubcache())) { return KernelCacheValPtr; } } @@ -1150,7 +1147,8 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( auto ret_val = std::make_shared( KernelArgMaskPair.first.retain(), &(BuildResult->MBuildResultMutex), KernelArgMaskPair.second, std::move(Program), ContextImpl.getAdapter()); - Cache.saveKernel(KernelName, UrDevice, ret_val, CacheHintPtr); + Cache.saveKernel(KernelName, UrDevice, ret_val, + KernelNameBasedData.getKernelSubcache()); return ret_val; } @@ -1814,24 +1812,18 @@ void ProgramManager::cacheKernelImplicitLocalArg( } } -std::optional ProgramManager::kernelImplicitLocalArgPos( - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const { - auto getLocalArgPos = [&]() -> std::optional { - auto it = m_KernelImplicitLocalArgPos.find(KernelName); - if (it != m_KernelImplicitLocalArgPos.end()) - return it->second; - return {}; - }; +std::optional +ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { + auto it = m_KernelImplicitLocalArgPos.find(KernelName); + if (it != m_KernelImplicitLocalArgPos.end()) + return it->second; + return {}; +} - if (!KernelNameBasedCachePtr) - return getLocalArgPos(); - std::optional> &ImplicitLocalArgPos = - KernelNameBasedCachePtr->ImplicitLocalArgPos; - if (!ImplicitLocalArgPos.has_value()) { - ImplicitLocalArgPos = getLocalArgPos(); - } - return ImplicitLocalArgPos.value(); +KernelNameBasedData * +ProgramManager::getOrCreateKernelNameBasedData(KernelNameStrRefT KernelName) { + auto Result = m_KernelNameBasedDataMap.try_emplace(KernelName, KernelName); + return &Result.first->second; } static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, @@ -2152,55 +2144,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Drop the kernel argument mask map m_EliminatedKernelArgMasks.erase(Img); - // Unmap the unique kernel IDs for the offload entries - for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - EntriesIt = EntriesIt->Increment()) { - detail::KernelNameStrT Name = EntriesIt->GetName(); - // Drop entry for service kernel - if (Name.find("__sycl_service_kernel__") != std::string::npos) { - removeFromMultimapByVal(m_ServiceKernels, Name, Img); - continue; - } - - // Exported device functions won't have a kernel ID - if (m_ExportedSymbolImages.find(std::string(Name)) != - m_ExportedSymbolImages.end()) { - continue; - } - - auto Name2IDIt = m_KernelName2KernelIDs.find(Name); - if (Name2IDIt != m_KernelName2KernelIDs.end()) - removeFromMultimapByVal(m_KernelIDs2BinImage, Name2IDIt->second, Img); - - auto RefCountIt = m_KernelNameRefCount.find(Name); - assert(RefCountIt != m_KernelNameRefCount.end()); - int &RefCount = RefCountIt->second; - assert(RefCount > 0); - - // Remove everything associated with this KernelName if this is the last - // image referencing it. - if (--RefCount == 0) { - // TODO aggregate all these maps into a single one since their entries - // share lifetime. - m_KernelUsesAssert.erase(Name); - m_KernelImplicitLocalArgPos.erase(Name); - m_KernelNameRefCount.erase(RefCountIt); - if (Name2IDIt != m_KernelName2KernelIDs.end()) - m_KernelName2KernelIDs.erase(Name2IDIt); - } - } - - // Drop reverse mapping - m_BinImg2KernelIDs.erase(Img); - - // Unregister exported symbol -> Img pair (needs to happen after the ID - // unmap loop) - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - removeFromMultimapByVal(m_ExportedSymbolImages, ESProp->Name, Img, - /*AssertContains*/ false); - } - for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); @@ -2258,6 +2201,56 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { } } + // Unmap the unique kernel IDs for the offload entries + for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; + EntriesIt = EntriesIt->Increment()) { + detail::KernelNameStrT Name = EntriesIt->GetName(); + // Drop entry for service kernel + if (Name.find("__sycl_service_kernel__") != std::string::npos) { + removeFromMultimapByVal(m_ServiceKernels, Name, Img); + continue; + } + + // Exported device functions won't have a kernel ID + if (m_ExportedSymbolImages.find(std::string(Name)) != + m_ExportedSymbolImages.end()) { + continue; + } + + auto Name2IDIt = m_KernelName2KernelIDs.find(Name); + if (Name2IDIt != m_KernelName2KernelIDs.end()) + removeFromMultimapByVal(m_KernelIDs2BinImage, Name2IDIt->second, Img); + + auto RefCountIt = m_KernelNameRefCount.find(Name); + assert(RefCountIt != m_KernelNameRefCount.end()); + int &RefCount = RefCountIt->second; + assert(RefCount > 0); + + // Remove everything associated with this KernelName if this is the last + // image referencing it. + if (--RefCount == 0) { + // TODO aggregate all these maps into a single one since their entries + // share lifetime. + m_KernelUsesAssert.erase(Name); + m_KernelImplicitLocalArgPos.erase(Name); + m_KernelNameBasedDataMap.erase(Name); + m_KernelNameRefCount.erase(RefCountIt); + if (Name2IDIt != m_KernelName2KernelIDs.end()) + m_KernelName2KernelIDs.erase(Name2IDIt); + } + } + + // Drop reverse mapping + m_BinImg2KernelIDs.erase(Img); + + // Unregister exported symbol -> Img pair (needs to happen after the ID + // unmap loop) + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + removeFromMultimapByVal(m_ExportedSymbolImages, ESProp->Name, Img, + /*AssertContains*/ false); + } + m_DeviceImages.erase(DevImgIt); } } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index eaea458f95e8..0d47df758cdf 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -201,7 +201,7 @@ class ProgramManager { FastKernelCacheValPtr getOrCreateKernel(context_impl &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameBasedData &KernelNameBasedData, const NDRDescT &NDRDesc = {}); ur_kernel_handle_t getCachedMaterializedKernel( @@ -367,23 +367,17 @@ class ProgramManager { ~ProgramManager() = default; template - bool kernelUsesAssert(const NameT &KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const { - if (!KernelNameBasedCachePtr) - return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); - - std::optional &UsesAssert = KernelNameBasedCachePtr->UsesAssert; - if (!UsesAssert.has_value()) - UsesAssert = - m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); - return UsesAssert.value(); + bool kernelUsesAssert(const NameT &KernelName) const { + return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); } SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } - std::optional kernelImplicitLocalArgPos( - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const; + std::optional + kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; + + KernelNameBasedData * + getOrCreateKernelNameBasedData(KernelNameStrRefT KernelName); std::set getRawDeviceImages(const std::vector &KernelIDs); @@ -541,6 +535,11 @@ class ProgramManager { KernelUsesAssertSet m_KernelUsesAssert; std::unordered_map m_KernelImplicitLocalArgPos; + // Map for storing kernel name based caches. Runtime lookup should only be + // performed for ABI compatibility and user library unloading. + std::unordered_map + m_KernelNameBasedDataMap; + // Sanitizer type used in device image SanitizerType m_SanitizerFoundInImage; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index f4ed064543ce..9f1a88115057 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -871,9 +871,7 @@ class queue_impl : public std::enable_shared_from_this { // Kernel only uses assert if it's non interop one KernelUsesAssert = (!Handler.MKernel || Handler.MKernel->hasSYCLMetadata()) && - ProgramManager::getInstance().kernelUsesAssert( - Handler.MKernelName.data(), - Handler.impl->MKernelNameBasedCachePtr); + Handler.impl->MKernelNameBasedDataPtr->usesAssert(); auto &PostProcess = *PostProcessorFunc; PostProcess(IsKernel, KernelUsesAssert, Event); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d16f917cb94c..78a33b195088 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1988,8 +1988,7 @@ std::string instrumentationGetKernelName( void instrumentationAddExtraKernelMetadata( xpti_td *&CmdTraceEvent, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameStrRefT KernelName, KernelNameBasedData *KernelNameBasedDataPtr, const std::shared_ptr &SyclKernel, queue_impl *Queue, std::vector &CGArgs) // CGArgs are not const since they could be // sorted in this function @@ -2013,10 +2012,11 @@ void instrumentationAddExtraKernelMetadata( // NOTE: Queue can be null when kernel is directly enqueued to a command // buffer // by graph API, when a modifiable graph is finalized. + assert(KernelNameBasedDataPtr); FastKernelCacheValPtr FastKernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( Queue->getContextImpl(), Queue->getDeviceImpl(), KernelName, - KernelNameBasedCachePtr); + *KernelNameBasedDataPtr); EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; } @@ -2104,7 +2104,7 @@ std::pair emitKernelInstrumentationData( const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, const std::string_view SyclKernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, queue_impl *Queue, + KernelNameBasedData *KernelNameBasedDataPtr, queue_impl *Queue, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs) { @@ -2143,7 +2143,7 @@ std::pair emitKernelInstrumentationData( getQueueID(Queue)); instrumentationAddExtraKernelMetadata( CmdTraceEvent, NDRDesc, KernelBundleImplPtr, - std::string(SyclKernelName), KernelNameBasedCachePtr, SyclKernel, Queue, + std::string(SyclKernelName), KernelNameBasedDataPtr, SyclKernel, Queue, CGArgs); xptiNotifySubscribers( @@ -2199,7 +2199,7 @@ void ExecCGCommand::emitInstrumentationData() { reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( CmdTraceEvent, KernelCG->MNDRDesc, KernelCG->getKernelBundle().get(), - KernelCG->MKernelName, KernelCG->MKernelNameBasedCachePtr, + KernelCG->MKernelName, KernelCG->MKernelNameBasedDataPtr, KernelCG->MSyclKernel, MQueue.get(), KernelCG->MArgs); } @@ -2401,8 +2401,7 @@ static ur_result_t SetKernelParamsAndLaunch( const std::function &getMemAllocationFunc, bool IsCooperative, bool KernelUsesClusterLaunch, uint32_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage, - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameStrRefT KernelName, KernelNameBasedData *KernelNameBasedDataPtr, void *KernelFuncPtr = nullptr, int KernelNumArgs = 0, detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr, bool KernelHasSpecialCaptures = true) { @@ -2448,9 +2447,8 @@ static ur_result_t SetKernelParamsAndLaunch( applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } - std::optional ImplicitLocalArg = - ProgramManager::getInstance().kernelImplicitLocalArgPos( - KernelName, KernelNameBasedCachePtr); + const std::optional &ImplicitLocalArg = + KernelNameBasedDataPtr->getImplicitLocalArgPos(); // Set the implicit local memory buffer to support // get_work_group_scratch_memory. This is for backend not supporting // CUDA-style local memory setting. Note that we may have -1 as a position, @@ -2551,10 +2549,11 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, DeviceImageImpl = &SyclKernelImpl->getDeviceImage(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { + assert(CommandGroup.MKernelNameBasedDataPtr); FastKernelCacheValPtr FastKernelCacheVal = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName, - CommandGroup.MKernelNameBasedCachePtr); + *CommandGroup.MKernelNameBasedDataPtr); UrKernel = FastKernelCacheVal->MKernelHandle; EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; // To keep UrKernel valid, we return FastKernelCacheValPtr. @@ -2669,7 +2668,7 @@ void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameBasedData *KernelNameBasedDataPtr, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, @@ -2715,8 +2714,9 @@ void enqueueImpKernel( EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); KernelMutex = SyclKernelImpl->getCacheMutex(); } else { + assert(KernelNameBasedDataPtr); KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr, NDRDesc); + ContextImpl, DeviceImpl, KernelName, *KernelNameBasedDataPtr, NDRDesc); Kernel = KernelCacheVal->MKernelHandle; KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; @@ -2763,7 +2763,7 @@ void enqueueImpKernel( Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList, OutEventImpl, EliminatedArgMask, getMemAllocationFunc, KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize, - BinImage, KernelName, KernelNameBasedCachePtr, KernelFuncPtr, + BinImage, KernelName, KernelNameBasedDataPtr, KernelFuncPtr, KernelNumArgs, KernelParamDescGetter, KernelHasSpecialCaptures); } if (UR_RESULT_SUCCESS != Error) { @@ -3242,10 +3242,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (!EventImpl) { // Kernel only uses assert if it's non interop one - bool KernelUsesAssert = - (!SyclKernel || SyclKernel->hasSYCLMetadata()) && - ProgramManager::getInstance().kernelUsesAssert( - KernelName, ExecKernel->MKernelNameBasedCachePtr); + bool KernelUsesAssert = (!SyclKernel || SyclKernel->hasSYCLMetadata()) && + ExecKernel->MKernelNameBasedDataPtr->usesAssert(); if (KernelUsesAssert) { EventImpl = MEvent.get(); } @@ -3258,7 +3256,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { } enqueueImpKernel( *MQueue, NDRDesc, Args, ExecKernel->getKernelBundle().get(), - SyclKernel.get(), KernelName, ExecKernel->MKernelNameBasedCachePtr, + SyclKernel.get(), KernelName, ExecKernel->MKernelNameBasedDataPtr, RawEvents, EventImpl, getMemAllocationFunc, ExecKernel->MKernelCacheConfig, ExecKernel->MKernelIsCooperative, ExecKernel->MKernelUsesClusterLaunch, diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 0fa9835ae134..cffd7d78d861 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -628,7 +628,7 @@ void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameBasedData *KernelNameBasedDataPtr, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative, @@ -695,7 +695,7 @@ std::pair emitKernelInstrumentationData( const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, std::string_view SyclKernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, queue_impl *Queue, + KernelNameBasedData *KernelNameBasedDataPtr, queue_impl *Queue, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs); #endif diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f9d1769e573e..f054f8b8cc4e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -538,6 +538,18 @@ event handler::finalize() { } if (type == detail::CGType::Kernel) { + if (impl->MKernelNameBasedDataPtr) { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + impl->MKernelNameBasedDataPtr->initIfNeeded( + toKernelNameStrT(MKernelName)); +#endif + } else { + // Fetch the kernel name based data pointer if it hasn't been set (e.g. + // in kernel bundle or free function cases). + impl->MKernelNameBasedDataPtr = + detail::ProgramManager::getInstance().getOrCreateKernelNameBasedData( + toKernelNameStrT(MKernelName)); + } // If there were uses of set_specialization_constant build the kernel_bundle detail::kernel_bundle_impl *KernelBundleImpPtr = getOrInsertHandlerKernelBundlePtr(/*Insert=*/false); @@ -611,10 +623,8 @@ event handler::finalize() { !impl->MEventNeeded && impl->get_queue().supportsDiscardingPiEvents(); if (DiscardEvent) { // Kernel only uses assert if it's non interop one - bool KernelUsesAssert = - !(MKernel && MKernel->isInterop()) && - detail::ProgramManager::getInstance().kernelUsesAssert( - toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr); + bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && + impl->MKernelNameBasedDataPtr->usesAssert(); DiscardEvent = !KernelUsesAssert; } @@ -635,7 +645,7 @@ event handler::finalize() { StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, - MKernelName.data(), impl->MKernelNameBasedCachePtr, + MKernelName.data(), impl->MKernelNameBasedDataPtr, impl->get_queue_or_null(), impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); detail::emitInstrumentationGeneral(StreamID, InstanceID, @@ -652,7 +662,7 @@ event handler::finalize() { enqueueImpKernel( impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel.get(), toKernelNameStrT(MKernelName), - impl->MKernelNameBasedCachePtr, RawEvents, ResultEvent.get(), + impl->MKernelNameBasedDataPtr, RawEvents, ResultEvent.get(), nullptr, impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, BinImage, impl->MKernelFuncPtr, impl->MKernelNumArgs, @@ -713,7 +723,7 @@ event handler::finalize() { impl->MNDRDesc, std::move(MHostKernel), std::move(MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), toKernelNameStrT(MKernelName), - impl->MKernelNameBasedCachePtr, std::move(MStreamStorage), + impl->MKernelNameBasedDataPtr, std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, @@ -2594,9 +2604,18 @@ void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelNameBasedCachePtr( sycl::detail::KernelNameBasedCacheT *KernelNameBasedCachePtr) { - impl->MKernelNameBasedCachePtr = KernelNameBasedCachePtr; + setKernelNameBasedDataPtr( + reinterpret_cast( + KernelNameBasedCachePtr)); +} +#endif + +void handler::setKernelNameBasedDataPtr( + sycl::detail::KernelNameBasedData *KernelNameBasedDataPtr) { + impl->MKernelNameBasedDataPtr = KernelNameBasedDataPtr; } void handler::setKernelInfo( diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index f41c07ee394b..a9bb5003f977 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3334,6 +3334,7 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv _ZN4sycl3_V16detail21LocalAccessorBaseHostC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE +_ZN4sycl3_V16detail22free_function_info_map3addEPKPKcPKjj _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKNS0_4spanIcLm18446744073709551615EEENS0_12bundle_stateE @@ -3346,6 +3347,7 @@ _ZN4sycl3_V16detail22removeDuplicateDevicesERKSt6vectorINS0_6deviceESaIS3_EE _ZN4sycl3_V16detail23constructorNotificationEPvS2_NS0_6access6targetENS3_4modeERKNS1_13code_locationE _ZN4sycl3_V16detail24find_device_intersectionERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EE _ZN4sycl3_V16detail26createKernelNameBasedCacheEv +_ZN4sycl3_V16detail26getKernelNameBasedDataImplERKNS1_6stringE _ZN4sycl3_V16detail26isDeviceGlobalUsedInKernelEPKv _ZN4sycl3_V16detail27getPixelCoordLinearFiltModeENS0_3vecIfLi4EEENS0_15addressing_modeENS0_5rangeILi3EEERS3_ _ZN4sycl3_V16detail28SampledImageAccessorBaseHost10getAccDataEv @@ -3605,6 +3607,7 @@ _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm _ZN4sycl3_V17handler24registerDynamicParameterEPNS0_3ext6oneapi12experimental6detail22dynamic_parameter_implEi _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb +_ZN4sycl3_V17handler25setKernelNameBasedDataPtrEPNS0_6detail19KernelNameBasedDataE _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE _ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi @@ -3850,7 +3853,6 @@ _ZNK4sycl3_V16detail19kernel_bundle_plain5emptyEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost6getPtrEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv -_ZN4sycl3_V16detail22free_function_info_map3addEPKPKcPKjj _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost10getSamplerEv _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost12getNumOfDimsEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 8d6235784776..9d01a5a6722c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3841,7 +3841,7 @@ ?contextSetExtendedDeleter@pi@detail@_V1@sycl@@YAXAEBVcontext@34@P6AXPEAX@Z1@Z ?copyCodeLoc@handler@_V1@sycl@@AEAAXAEBV123@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?createKernelNameBasedCache@detail@_V1@sycl@@YAPEAUKernelNameBasedCacheT@123@XZ +?createKernelNameBasedData@detail@_V1@sycl@@YAPEAUKernelNameBasedData@123@XZ ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@PEAX_KAEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4417,7 +4417,7 @@ ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z -?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z +?setKernelNameBasedDataPtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedData@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@0V?$id@$00@23@@Z diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 3087928f17c3..64b481940fa4 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -130,6 +130,8 @@ // CHECK-NEXT: CL/cl_version.h // CHECK-NEXT: CL/cl_platform.h // CHECK-NEXT: CL/cl_ext.h +// CHECK-NEXT: detail/get_kernel_name_based_data.hpp +// CHECK-NEXT: detail/kernel_name_str_t.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp @@ -138,8 +140,6 @@ // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/kernel_name_based_cache.hpp -// CHECK-NEXT: detail/kernel_name_str_t.hpp // CHECK-NEXT: detail/reduction_forward.hpp // CHECK-NEXT: detail/ur.hpp // CHECK-NEXT: ur_api_funcs.def diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 27f860fb2565..6052dbc617fc 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -148,7 +148,7 @@ class MockHandler : public sycl::handler { std::move(impl->MNDRDesc), std::move(CGH->MHostKernel), std::move(CGH->MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), - CGH->MKernelName.data(), impl->MKernelNameBasedCachePtr, + CGH->MKernelName.data(), impl->MKernelNameBasedDataPtr, std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), impl->MCGType, {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 30966b26e742..d47ea0ff95b2 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -303,7 +303,7 @@ class MockHandlerCustomFinalize : public MockHandler { CommandGroup.reset(new sycl::detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), std::move(impl->MKernelBundle), std::move(CGData), getArgs(), - getKernelName(), impl->MKernelNameBasedCachePtr, getStreamStorage(), + getKernelName(), impl->MKernelNameBasedDataPtr, getStreamStorage(), impl->MAuxiliaryResources, getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index d038004b1e1e..28702049b602 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -33,7 +33,7 @@ class MockHandlerStreamInit : public MockHandler { detail::CG::StorageInitHelper(getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()), - getArgs(), getKernelName(), impl->MKernelNameBasedCachePtr, + getArgs(), getKernelName(), impl->MKernelNameBasedDataPtr, getStreamStorage(), std::move(impl->MAuxiliaryResources), getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); From 7fde713d648dd70a04051c20bfd0eef88e6773a0 Mon Sep 17 00:00:00 2001 From: Semenov Date: Tue, 19 Aug 2025 07:47:47 -0700 Subject: [PATCH 02/25] Update Windows ABI --- sycl/test/abi/sycl_symbols_windows.dump | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 9d01a5a6722c..28db6c2cd4cf 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -229,9 +229,9 @@ ??$get_info_impl@Unative_vector_width_int@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_long@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_short@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ +??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_args@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_compute_units@device@info@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ -??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Uopencl_c_version@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uparent_device@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV012@XZ ??$get_info_impl@Upartition_affinity_domains@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4partition_affinity_domain@info@_V1@sycl@@V?$allocator@W4partition_affinity_domain@info@_V1@sycl@@@std@@@std@@XZ @@ -3841,7 +3841,7 @@ ?contextSetExtendedDeleter@pi@detail@_V1@sycl@@YAXAEBVcontext@34@P6AXPEAX@Z1@Z ?copyCodeLoc@handler@_V1@sycl@@AEAAXAEBV123@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?createKernelNameBasedData@detail@_V1@sycl@@YAPEAUKernelNameBasedData@123@XZ +?createKernelNameBasedCache@detail@_V1@sycl@@YAPEAVKernelNameBasedCacheT@123@XZ ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@PEAX_KAEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4073,6 +4073,7 @@ ?getEndTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ ?getKernelBundle@handler@_V1@sycl@@AEBA?AV?$kernel_bundle@$0A@@23@XZ ?getKernelName@handler@_V1@sycl@@AEAA?AVstring@detail@23@XZ +?getKernelNameBasedDataImpl@detail@_V1@sycl@@YAPEAVKernelNameBasedData@123@AEBVstring@123@@Z ?getMaxWorkGroups@handler@_V1@sycl@@AEAA?AV?$optional@V?$array@_K$02@std@@@std@@XZ ?getMaxWorkGroups_v2@handler@_V1@sycl@@AEAA?AV?$tuple@V?$array@_K$02@std@@_N@std@@XZ ?getMemoryObject@AccessorBaseHost@detail@_V1@sycl@@QEBAPEAXXZ @@ -4188,8 +4189,8 @@ ?get_impl@handler@_V1@sycl@@AEAAPEAVhandler_impl@detail@23@XZ ?get_kernel@kernel_bundle_plain@detail@_V1@sycl@@IEBA?AVkernel@34@AEBVkernel_id@34@@Z ?get_kernel_bundle@kernel@_V1@sycl@@QEBA?AV?$kernel_bundle@$01@23@XZ -?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z +?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@AEBV?$function@$$A6A_NAEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z@5@@Z ?get_kernel_id_impl@detail@_V1@sycl@@YA?AVkernel_id@23@Vstring_view@123@@Z @@ -4417,7 +4418,8 @@ ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z -?setKernelNameBasedDataPtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedData@detail@23@@Z +?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAVKernelNameBasedCacheT@detail@23@@Z +?setKernelNameBasedDataPtr@handler@_V1@sycl@@AEAAXPEAVKernelNameBasedData@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@0V?$id@$00@23@@Z @@ -4429,8 +4431,8 @@ ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ From b38a467c47ff38e93696791e1e5cd679c35fc610 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 19 Aug 2025 08:31:21 -0700 Subject: [PATCH 03/25] Assert data presence in CG --- sycl/source/detail/cg.hpp | 7 ++--- sycl/source/detail/graph/graph_impl.cpp | 5 ++- sycl/source/detail/scheduler/commands.cpp | 31 +++++++++---------- sycl/source/detail/scheduler/commands.hpp | 8 ++--- sycl/source/handler.cpp | 6 ++-- .../arg_mask/EliminatedArgMask.cpp | 2 +- .../scheduler/SchedulerTestUtils.hpp | 2 +- .../scheduler/StreamInitDependencyOnHost.cpp | 2 +- 8 files changed, 29 insertions(+), 34 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 29ed9213c315..1c475c134073 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -254,7 +254,7 @@ class CGExecKernel : public CG { std::shared_ptr MKernelBundle; std::vector MArgs; KernelNameStrT MKernelName; - KernelNameBasedData *MKernelNameBasedDataPtr; + KernelNameBasedData &MKernelNameBasedData; std::vector> MStreams; std::vector> MAuxiliaryResources; /// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list @@ -270,7 +270,7 @@ class CGExecKernel : public CG { std::shared_ptr KernelBundle, CG::StorageInitHelper CGData, std::vector Args, KernelNameStrT KernelName, - KernelNameBasedData *KernelNameBasedDataPtr, + KernelNameBasedData &KernelNameBasedData, std::vector> Streams, std::vector> AuxiliaryResources, CGType Type, ur_kernel_cache_config_t KernelCacheConfig, @@ -280,8 +280,7 @@ class CGExecKernel : public CG { MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)), MKernelName(std::move(KernelName)), - MKernelNameBasedDataPtr(KernelNameBasedDataPtr), - MStreams(std::move(Streams)), + MKernelNameBasedData(KernelNameBasedData), MStreams(std::move(Streams)), MAuxiliaryResources(std::move(AuxiliaryResources)), MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)), MKernelIsCooperative(KernelIsCooperative), diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index 52dd77067ebb..79c699023f66 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -741,7 +741,7 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( CGExec->MLine, CGExec->MColumn); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc, - CGExec->MKernelName.data(), CGExec->MKernelNameBasedDataPtr, nullptr, + CGExec->MKernelName.data(), CGExec->MKernelNameBasedData, nullptr, CGExec->MNDRDesc, CGExec->MKernelBundle.get(), CGExec->MArgs); if (CmdTraceEvent) sycl::detail::emitInstrumentationGeneral( @@ -1573,10 +1573,9 @@ void exec_graph_impl::populateURKernelUpdateStructs( UrKernel = SyclKernelImpl->getHandleRef(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { - assert(ExecCG.MKernelNameBasedDataPtr); BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, ExecCG.MKernelName, - *ExecCG.MKernelNameBasedDataPtr); + ExecCG.MKernelNameBasedData); UrKernel = BundleObjs->MKernelHandle; EliminatedArgMask = BundleObjs->MKernelArgMask; } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 78a33b195088..85d3d80b3615 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1988,7 +1988,7 @@ std::string instrumentationGetKernelName( void instrumentationAddExtraKernelMetadata( xpti_td *&CmdTraceEvent, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, - KernelNameStrRefT KernelName, KernelNameBasedData *KernelNameBasedDataPtr, + KernelNameStrRefT KernelName, KernelNameBasedData &KernelNameBasedData, const std::shared_ptr &SyclKernel, queue_impl *Queue, std::vector &CGArgs) // CGArgs are not const since they could be // sorted in this function @@ -2012,11 +2012,10 @@ void instrumentationAddExtraKernelMetadata( // NOTE: Queue can be null when kernel is directly enqueued to a command // buffer // by graph API, when a modifiable graph is finalized. - assert(KernelNameBasedDataPtr); FastKernelCacheValPtr FastKernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( Queue->getContextImpl(), Queue->getDeviceImpl(), KernelName, - *KernelNameBasedDataPtr); + KernelNameBasedData); EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; } @@ -2104,7 +2103,7 @@ std::pair emitKernelInstrumentationData( const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, const std::string_view SyclKernelName, - KernelNameBasedData *KernelNameBasedDataPtr, queue_impl *Queue, + KernelNameBasedData &KernelNameBasedData, queue_impl *Queue, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs) { @@ -2143,7 +2142,7 @@ std::pair emitKernelInstrumentationData( getQueueID(Queue)); instrumentationAddExtraKernelMetadata( CmdTraceEvent, NDRDesc, KernelBundleImplPtr, - std::string(SyclKernelName), KernelNameBasedDataPtr, SyclKernel, Queue, + std::string(SyclKernelName), KernelNameBasedData, SyclKernel, Queue, CGArgs); xptiNotifySubscribers( @@ -2199,7 +2198,7 @@ void ExecCGCommand::emitInstrumentationData() { reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( CmdTraceEvent, KernelCG->MNDRDesc, KernelCG->getKernelBundle().get(), - KernelCG->MKernelName, KernelCG->MKernelNameBasedDataPtr, + KernelCG->MKernelName, KernelCG->MKernelNameBasedData, KernelCG->MSyclKernel, MQueue.get(), KernelCG->MArgs); } @@ -2401,7 +2400,7 @@ static ur_result_t SetKernelParamsAndLaunch( const std::function &getMemAllocationFunc, bool IsCooperative, bool KernelUsesClusterLaunch, uint32_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage, - KernelNameStrRefT KernelName, KernelNameBasedData *KernelNameBasedDataPtr, + KernelNameStrRefT KernelName, KernelNameBasedData &KernelNameBasedData, void *KernelFuncPtr = nullptr, int KernelNumArgs = 0, detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr, bool KernelHasSpecialCaptures = true) { @@ -2448,7 +2447,7 @@ static ur_result_t SetKernelParamsAndLaunch( } const std::optional &ImplicitLocalArg = - KernelNameBasedDataPtr->getImplicitLocalArgPos(); + KernelNameBasedData.getImplicitLocalArgPos(); // Set the implicit local memory buffer to support // get_work_group_scratch_memory. This is for backend not supporting // CUDA-style local memory setting. Note that we may have -1 as a position, @@ -2549,11 +2548,10 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, DeviceImageImpl = &SyclKernelImpl->getDeviceImage(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { - assert(CommandGroup.MKernelNameBasedDataPtr); FastKernelCacheValPtr FastKernelCacheVal = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName, - *CommandGroup.MKernelNameBasedDataPtr); + CommandGroup.MKernelNameBasedData); UrKernel = FastKernelCacheVal->MKernelHandle; EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; // To keep UrKernel valid, we return FastKernelCacheValPtr. @@ -2668,7 +2666,7 @@ void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - KernelNameBasedData *KernelNameBasedDataPtr, + KernelNameBasedData &KernelNameBasedData, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, @@ -2714,9 +2712,8 @@ void enqueueImpKernel( EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); KernelMutex = SyclKernelImpl->getCacheMutex(); } else { - assert(KernelNameBasedDataPtr); KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, KernelName, *KernelNameBasedDataPtr, NDRDesc); + ContextImpl, DeviceImpl, KernelName, KernelNameBasedData, NDRDesc); Kernel = KernelCacheVal->MKernelHandle; KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; @@ -2763,8 +2760,8 @@ void enqueueImpKernel( Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList, OutEventImpl, EliminatedArgMask, getMemAllocationFunc, KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize, - BinImage, KernelName, KernelNameBasedDataPtr, KernelFuncPtr, - KernelNumArgs, KernelParamDescGetter, KernelHasSpecialCaptures); + BinImage, KernelName, KernelNameBasedData, KernelFuncPtr, KernelNumArgs, + KernelParamDescGetter, KernelHasSpecialCaptures); } if (UR_RESULT_SUCCESS != Error) { // If we have got non-success error code, let's analyze it to emit nice @@ -3243,7 +3240,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (!EventImpl) { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = (!SyclKernel || SyclKernel->hasSYCLMetadata()) && - ExecKernel->MKernelNameBasedDataPtr->usesAssert(); + ExecKernel->MKernelNameBasedData.usesAssert(); if (KernelUsesAssert) { EventImpl = MEvent.get(); } @@ -3256,7 +3253,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { } enqueueImpKernel( *MQueue, NDRDesc, Args, ExecKernel->getKernelBundle().get(), - SyclKernel.get(), KernelName, ExecKernel->MKernelNameBasedDataPtr, + SyclKernel.get(), KernelName, ExecKernel->MKernelNameBasedData, RawEvents, EventImpl, getMemAllocationFunc, ExecKernel->MKernelCacheConfig, ExecKernel->MKernelIsCooperative, ExecKernel->MKernelUsesClusterLaunch, diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index cffd7d78d861..8fc7456e59b8 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -628,7 +628,7 @@ void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - KernelNameBasedData *KernelNameBasedDataPtr, + KernelNameBasedData &KernelNameBasedData, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative, @@ -694,9 +694,9 @@ std::pair emitKernelInstrumentationData( xpti::stream_id_t StreamID, const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, - std::string_view SyclKernelName, - KernelNameBasedData *KernelNameBasedDataPtr, queue_impl *Queue, - const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, + std::string_view SyclKernelName, KernelNameBasedData &KernelNameBasedData, + queue_impl *Queue, const NDRDescT &NDRDesc, + detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs); #endif diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f054f8b8cc4e..1699940c6f47 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -645,7 +645,7 @@ event handler::finalize() { StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, - MKernelName.data(), impl->MKernelNameBasedDataPtr, + MKernelName.data(), *impl->MKernelNameBasedDataPtr, impl->get_queue_or_null(), impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); detail::emitInstrumentationGeneral(StreamID, InstanceID, @@ -662,7 +662,7 @@ event handler::finalize() { enqueueImpKernel( impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel.get(), toKernelNameStrT(MKernelName), - impl->MKernelNameBasedDataPtr, RawEvents, ResultEvent.get(), + *impl->MKernelNameBasedDataPtr, RawEvents, ResultEvent.get(), nullptr, impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, BinImage, impl->MKernelFuncPtr, impl->MKernelNumArgs, @@ -723,7 +723,7 @@ event handler::finalize() { impl->MNDRDesc, std::move(MHostKernel), std::move(MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), toKernelNameStrT(MKernelName), - impl->MKernelNameBasedDataPtr, std::move(MStreamStorage), + *impl->MKernelNameBasedDataPtr, std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 6052dbc617fc..ffdb73d10766 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -148,7 +148,7 @@ class MockHandler : public sycl::handler { std::move(impl->MNDRDesc), std::move(CGH->MHostKernel), std::move(CGH->MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), - CGH->MKernelName.data(), impl->MKernelNameBasedDataPtr, + CGH->MKernelName.data(), *impl->MKernelNameBasedDataPtr, std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), impl->MCGType, {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index d47ea0ff95b2..3704bb3822c7 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -303,7 +303,7 @@ class MockHandlerCustomFinalize : public MockHandler { CommandGroup.reset(new sycl::detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), std::move(impl->MKernelBundle), std::move(CGData), getArgs(), - getKernelName(), impl->MKernelNameBasedDataPtr, getStreamStorage(), + getKernelName(), *impl->MKernelNameBasedDataPtr, getStreamStorage(), impl->MAuxiliaryResources, getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 28702049b602..0f928378425a 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -33,7 +33,7 @@ class MockHandlerStreamInit : public MockHandler { detail::CG::StorageInitHelper(getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()), - getArgs(), getKernelName(), impl->MKernelNameBasedDataPtr, + getArgs(), getKernelName(), *impl->MKernelNameBasedDataPtr, getStreamStorage(), std::move(impl->MAuxiliaryResources), getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); From 8e41e7a71daf6dce90b065dbdfe15436bbcaaa95 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 19 Aug 2025 13:09:37 -0700 Subject: [PATCH 04/25] Misc changes --- sycl/source/detail/global_handler.cpp | 4 ++-- sycl/source/detail/handler_impl.hpp | 3 ++- sycl/source/detail/kernel_name_based_data.cpp | 2 +- sycl/source/detail/kernel_name_based_data.hpp | 3 ++- sycl/source/detail/kernel_program_cache.hpp | 2 +- sycl/source/detail/program_manager/program_manager.hpp | 4 ++-- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 47f6cba89d8c..c8868f89fdd1 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -394,8 +394,8 @@ void shutdown_late() { Handler->MProgramManager.Inst.reset(nullptr); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - // Cache stores handles to the adapter, so clear it before - // releasing adapters. + // Kernel cache, which is part of kernel name based data, + // stores handles to the adapter, so clear it before releasing adapters. Handler->MKernelNameBasedDataStorage.Inst.reset(nullptr); #endif diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index b5b718ea89cb..1b4c1bd94d0a 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -243,7 +243,8 @@ class handler_impl { bool MKernelIsESIMD = false; bool MKernelHasSpecialCaptures = true; - // A pointer to a kernel name based cache retrieved on the application side. + // A pointer to kernel name based data. Cached on the application side in + // headers or retrieved from program manager. KernelNameBasedData *MKernelNameBasedDataPtr = nullptr; }; diff --git a/sycl/source/detail/kernel_name_based_data.cpp b/sycl/source/detail/kernel_name_based_data.cpp index a525d9d7969e..6ac757b20b97 100644 --- a/sycl/source/detail/kernel_name_based_data.cpp +++ b/sycl/source/detail/kernel_name_based_data.cpp @@ -47,7 +47,7 @@ const std::optional &KernelNameBasedData::getImplicitLocalArgPos() { void KernelNameBasedData::assertInitialized() { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - assert(MInitialized.load() && "Cache needs to be initialized before use"); + assert(MInitialized.load() && "Data needs to be initialized before use"); #endif } diff --git a/sycl/source/detail/kernel_name_based_data.hpp b/sycl/source/detail/kernel_name_based_data.hpp index 08c4a89caef3..a9aa0abc029b 100644 --- a/sycl/source/detail/kernel_name_based_data.hpp +++ b/sycl/source/detail/kernel_name_based_data.hpp @@ -72,6 +72,8 @@ struct FastKernelEntryT { using FastKernelSubcacheEntriesT = std::vector; +// Structure for caching built kernels with a specific name. +// Used by instances of the kernel program cache class (potentially multiple). struct FastKernelSubcacheT { FastKernelSubcacheEntriesT Entries; FastKernelSubcacheMutexT Mutex; @@ -80,7 +82,6 @@ struct FastKernelSubcacheT { // This class is used for aggregating kernel name based information. // Pointers to instances of this class are stored in header function templates // as a static variable to avoid repeated runtime lookup overhead. - // TODO Currently this class duplicates information fetched from the program // manager. Instead, we should merge all of the kernel name based information // into this structure and get rid of the other KernelName -> * maps. diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 1d5e45aa52a4..34a6acd3753b 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -255,7 +255,7 @@ class KernelProgramCache { FastKernelSubcacheT &get() { return *MSubcachePtr; } private: - FastKernelSubcacheT *MSubcachePtr; + FastKernelSubcacheT *MSubcachePtr = nullptr; ur_context_handle_t MUrContext = nullptr; }; diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 0d47df758cdf..42cfaa2c6fdf 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -535,8 +535,8 @@ class ProgramManager { KernelUsesAssertSet m_KernelUsesAssert; std::unordered_map m_KernelImplicitLocalArgPos; - // Map for storing kernel name based caches. Runtime lookup should only be - // performed for ABI compatibility and user library unloading. + // Map for storing kernel name based data. Runtime lookup should be avoided + // by caching the pointers when possible. std::unordered_map m_KernelNameBasedDataMap; From 272bc610bd1d1c0f302de65075d276ec53d7d889 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 19 Aug 2025 13:27:01 -0700 Subject: [PATCH 05/25] Extend cleanup tests coverage --- sycl/unittests/program_manager/Cleanup.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 4a39f06ee525..de33de9ef99d 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -61,6 +61,12 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return NativePrograms; } + std::unordered_map & + getKernelNameBasedDataMap() { + return m_KernelNameBasedDataMap; + } + std::unordered_map & getKernelNameRefCount() { return m_KernelNameRefCount; @@ -307,6 +313,9 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM, checkContainer(PM.getVFSet2BinImage(), ExpectedEntryCount, generateRefNames(ImgIds, "VF"), "VFSet2BinImage " + CommentPostfix); + checkContainer(PM.getKernelNameBasedDataMap(), ExpectedEntryCount, + generateRefNames(ImgIds, "Kernel"), + "Kernel name based data map " + CommentPostfix); checkContainer(PM.getKernelNameRefCount(), ExpectedEntryCount, generateRefNames(ImgIds, "Kernel"), "Kernel name reference count " + CommentPostfix); @@ -366,6 +375,10 @@ TEST(ImageRemoval, BaseContainers) { generateRefName("B", "HostPipe").c_str()); PM.addOrInitHostPipeEntry(PipeC::get_host_ptr(), generateRefName("C", "HostPipe").c_str()); + std::vector KernelNames = + generateRefNames({"A", "B", "C"}, "Kernel"); + for (const std::string &Name : KernelNames) + PM.getOrCreateKernelNameBasedData(Name); checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(), {"A", "B", "C"}, "check failed before removal"); @@ -389,6 +402,8 @@ TEST(ImageRemoval, MultipleImagesPerEntry) { convertAndAddImages(PM, ImagesToRemoveSameEntries, NativeImagesForRemoval, TestBinaries); + std::string KernelName = generateRefName("A", "Kernel"); + PM.getOrCreateKernelNameBasedData(KernelName); checkAllInvolvedContainers( PM, ImagesToRemoveSameEntries.size() + ImagesToKeepSameEntries.size(), /*ExpectedEntryCount*/ 1, {"A"}, "check failed before removal", From 57553b764b042d4a95041c3c4227aaa398639674 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 21 Aug 2025 11:28:15 -0700 Subject: [PATCH 06/25] Add `CompileTimeKernelInfoTy` and unimplemented `getDeviceKernelInfo` --- sycl/include/sycl/detail/kernel_desc.hpp | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sycl/include/sycl/detail/kernel_desc.hpp b/sycl/include/sycl/detail/kernel_desc.hpp index 09d294d1b2d9..9062d02197a8 100644 --- a/sycl/include/sycl/detail/kernel_desc.hpp +++ b/sycl/include/sycl/detail/kernel_desc.hpp @@ -277,6 +277,47 @@ template constexpr bool hasSpecialCaptures() { } return FoundSpecialCapture; } +inline namespace compile_time_kernel_info_v1 { + +// This is being passed across ABI boundary, so we don't use std::string_view, +// at least for as long as we support user apps built with GNU libstdc++'s +// pre-C++11 ABI. +struct CompileTimeKernelInfoTy { + const char *Name = nullptr; + unsigned NumParams = 0; + bool IsESIMD = false; + const char *FileName = ""; + const char *FunctionName = ""; + unsigned LineNumber = 0; + unsigned ColumnNumber = 0; + int64_t KernelSize; + kernel_param_desc_t &(*ParamDescGetter)(int) = nullptr; + bool HasSpecialCaptures = false; +}; + +template +inline constexpr CompileTimeKernelInfoTy CompileTimeKernelInfo{ + getKernelName(), getKernelNumParams(), + isKernelESIMD(), getKernelFileName(), + getKernelFunctionName(), getKernelLineNumber(), + getKernelColumnNumber(), getKernelSize(), + &getKernelParamDesc, hasSpecialCaptures()}; +} // namespace compile_time_kernel_info_v1 + +class DeviceKernelInfo; +// Lifetime of the underlying `DeviceKernelInfo` is tied to the availability of +// the `sycl_device_binaries` corresponding to this kernel. In other words, once +// user library is unloaded (see __sycl_unregister_lib), program manager destoys +// this `DeviceKernelInfo` object and the reference returned from here becomes +// stale. +__SYCL_EXPORT DeviceKernelInfo & +getDeviceKernelInfo(const CompileTimeKernelInfoTy &); + +template DeviceKernelInfo &getDeviceKernelInfo() { + static DeviceKernelInfo &Info = + getDeviceKernelInfo(CompileTimeKernelInfo); + return Info; +} } // namespace detail } // namespace _V1 From abd48ae9d3e16f830217fc3f011a681e2483a5eb Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 21 Aug 2025 11:31:33 -0700 Subject: [PATCH 07/25] Rename KernelNameBasedData -> DeviceKernelInfo --- .../sycl/detail/get_kernel_name_based_data.hpp | 8 ++++---- sycl/include/sycl/handler.hpp | 2 +- sycl/source/detail/cg.hpp | 6 +++--- .../detail/get_kernel_name_based_data.cpp | 2 +- sycl/source/detail/global_handler.cpp | 2 +- sycl/source/detail/global_handler.hpp | 4 ++-- sycl/source/detail/handler_impl.hpp | 2 +- sycl/source/detail/kernel_name_based_data.cpp | 14 +++++++------- sycl/source/detail/kernel_name_based_data.hpp | 6 +++--- .../detail/program_manager/program_manager.cpp | 8 ++++---- .../detail/program_manager/program_manager.hpp | 6 +++--- sycl/source/detail/scheduler/commands.cpp | 18 +++++++++--------- sycl/source/detail/scheduler/commands.hpp | 4 ++-- sycl/source/handler.cpp | 4 ++-- sycl/unittests/program_manager/Cleanup.cpp | 2 +- 15 files changed, 44 insertions(+), 44 deletions(-) diff --git a/sycl/include/sycl/detail/get_kernel_name_based_data.hpp b/sycl/include/sycl/detail/get_kernel_name_based_data.hpp index f13dc15a327e..6a38b134b695 100644 --- a/sycl/include/sycl/detail/get_kernel_name_based_data.hpp +++ b/sycl/include/sycl/detail/get_kernel_name_based_data.hpp @@ -19,17 +19,17 @@ class KernelNameBasedCacheT; __SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); #endif -class KernelNameBasedData; +class DeviceKernelInfo; -__SYCL_EXPORT KernelNameBasedData * +__SYCL_EXPORT DeviceKernelInfo * getKernelNameBasedDataImpl(detail::ABINeutralKernelNameStrRefT KernelName); // Retrieves and caches a data pointer to avoid kernel name based lookup // overhead. template -KernelNameBasedData * +DeviceKernelInfo * getKernelNameBasedData(detail::ABINeutralKernelNameStrRefT KernelName) { - static KernelNameBasedData *Instance = getKernelNameBasedDataImpl(KernelName); + static DeviceKernelInfo *Instance = getKernelNameBasedDataImpl(KernelName); return Instance; } diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 909a958ff181..c5945fc00a99 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3691,7 +3691,7 @@ class __SYCL_EXPORT handler { detail::KernelNameBasedCacheT *KernelNameBasedCachePtr); #endif void setKernelNameBasedDataPtr( - detail::KernelNameBasedData *KernelNameBasedDataPtr); + detail::DeviceKernelInfo *KernelNameBasedDataPtr); queue getQueue(); diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 1c475c134073..c92f201699c1 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -254,7 +254,7 @@ class CGExecKernel : public CG { std::shared_ptr MKernelBundle; std::vector MArgs; KernelNameStrT MKernelName; - KernelNameBasedData &MKernelNameBasedData; + DeviceKernelInfo &MKernelNameBasedData; std::vector> MStreams; std::vector> MAuxiliaryResources; /// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list @@ -270,7 +270,7 @@ class CGExecKernel : public CG { std::shared_ptr KernelBundle, CG::StorageInitHelper CGData, std::vector Args, KernelNameStrT KernelName, - KernelNameBasedData &KernelNameBasedData, + DeviceKernelInfo &DeviceKernelInfo, std::vector> Streams, std::vector> AuxiliaryResources, CGType Type, ur_kernel_cache_config_t KernelCacheConfig, @@ -280,7 +280,7 @@ class CGExecKernel : public CG { MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)), MKernelName(std::move(KernelName)), - MKernelNameBasedData(KernelNameBasedData), MStreams(std::move(Streams)), + MKernelNameBasedData(DeviceKernelInfo), MStreams(std::move(Streams)), MAuxiliaryResources(std::move(AuxiliaryResources)), MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)), MKernelIsCooperative(KernelIsCooperative), diff --git a/sycl/source/detail/get_kernel_name_based_data.cpp b/sycl/source/detail/get_kernel_name_based_data.cpp index 5e8aa6c3f540..9c299f6ac299 100644 --- a/sycl/source/detail/get_kernel_name_based_data.cpp +++ b/sycl/source/detail/get_kernel_name_based_data.cpp @@ -20,7 +20,7 @@ KernelNameBasedCacheT *createKernelNameBasedCache() { } #endif -KernelNameBasedData * +DeviceKernelInfo * getKernelNameBasedDataImpl(detail::ABINeutralKernelNameStrRefT KernelName) { return ProgramManager::getInstance().getOrCreateKernelNameBasedData( KernelName.data()); diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index c8868f89fdd1..61fc5d8a8914 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -251,7 +251,7 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *GlobalHandler::createKernelNameBasedCache() { - static std::deque &KernelNameBasedDataStorage = + static std::deque &KernelNameBasedDataStorage = getOrCreate(MKernelNameBasedDataStorage); LockGuard LG{MKernelNameBasedDataStorage.Lock}; return reinterpret_cast( diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 9d330007d3eb..aea730c5afe4 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -31,7 +31,7 @@ class XPTIRegistry; class ThreadPool; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES class KernelNameBasedCacheT; -class KernelNameBasedData; +class DeviceKernelInfo; #endif /// Wrapper class for global data structures with non-trivial destructors. @@ -138,7 +138,7 @@ class GlobalHandler { // Thread pool for host task and event callbacks execution InstWithLock MHostTaskThreadPool; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - InstWithLock> MKernelNameBasedDataStorage; + InstWithLock> MKernelNameBasedDataStorage; #endif }; } // namespace detail diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 1b4c1bd94d0a..b76cf0778be8 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -245,7 +245,7 @@ class handler_impl { // A pointer to kernel name based data. Cached on the application side in // headers or retrieved from program manager. - KernelNameBasedData *MKernelNameBasedDataPtr = nullptr; + DeviceKernelInfo *MKernelNameBasedDataPtr = nullptr; }; } // namespace detail diff --git a/sycl/source/detail/kernel_name_based_data.cpp b/sycl/source/detail/kernel_name_based_data.cpp index 6ac757b20b97..4ec499403f66 100644 --- a/sycl/source/detail/kernel_name_based_data.cpp +++ b/sycl/source/detail/kernel_name_based_data.cpp @@ -12,11 +12,11 @@ namespace sycl { inline namespace _V1 { namespace detail { -KernelNameBasedData::KernelNameBasedData(KernelNameStrRefT KernelName) { +DeviceKernelInfo::DeviceKernelInfo(KernelNameStrRefT KernelName) { init(KernelName); } -void KernelNameBasedData::init(KernelNameStrRefT KernelName) { +void DeviceKernelInfo::init(KernelNameStrRefT KernelName) { auto &PM = detail::ProgramManager::getInstance(); MUsesAssert = PM.kernelUsesAssert(KernelName); MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); @@ -26,26 +26,26 @@ void KernelNameBasedData::init(KernelNameStrRefT KernelName) { } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES -void KernelNameBasedData::initIfNeeded(KernelNameStrRefT KernelName) { +void DeviceKernelInfo::initIfNeeded(KernelNameStrRefT KernelName) { if (!MInitialized.load()) init(KernelName); } #endif -FastKernelSubcacheT &KernelNameBasedData::getKernelSubcache() { +FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() { assertInitialized(); return MFastKernelSubcache; } -bool KernelNameBasedData::usesAssert() { +bool DeviceKernelInfo::usesAssert() { assertInitialized(); return MUsesAssert; } -const std::optional &KernelNameBasedData::getImplicitLocalArgPos() { +const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() { assertInitialized(); return MImplicitLocalArgPos; } -void KernelNameBasedData::assertInitialized() { +void DeviceKernelInfo::assertInitialized() { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES assert(MInitialized.load() && "Data needs to be initialized before use"); #endif diff --git a/sycl/source/detail/kernel_name_based_data.hpp b/sycl/source/detail/kernel_name_based_data.hpp index a9aa0abc029b..0081bae3929e 100644 --- a/sycl/source/detail/kernel_name_based_data.hpp +++ b/sycl/source/detail/kernel_name_based_data.hpp @@ -85,12 +85,12 @@ struct FastKernelSubcacheT { // TODO Currently this class duplicates information fetched from the program // manager. Instead, we should merge all of the kernel name based information // into this structure and get rid of the other KernelName -> * maps. -class KernelNameBasedData { +class DeviceKernelInfo { public: #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - KernelNameBasedData() = default; + DeviceKernelInfo() = default; #endif - KernelNameBasedData(KernelNameStrRefT KernelName); + DeviceKernelInfo(KernelNameStrRefT KernelName); void init(KernelNameStrRefT KernelName); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 13e6971330bb..1d7b20416252 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1084,7 +1084,7 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, FastKernelCacheValPtr ProgramManager::getOrCreateKernel( context_impl &ContextImpl, device_impl &DeviceImpl, - KernelNameStrRefT KernelName, KernelNameBasedData &KernelNameBasedData, + KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, const NDRDescT &NDRDesc) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << &ContextImpl @@ -1095,7 +1095,7 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( ur_device_handle_t UrDevice = DeviceImpl.getHandleRef(); if (SYCLConfig::get()) { if (auto KernelCacheValPtr = Cache.tryToGetKernelFast( - KernelName, UrDevice, KernelNameBasedData.getKernelSubcache())) { + KernelName, UrDevice, DeviceKernelInfo.getKernelSubcache())) { return KernelCacheValPtr; } } @@ -1148,7 +1148,7 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( KernelArgMaskPair.first.retain(), &(BuildResult->MBuildResultMutex), KernelArgMaskPair.second, std::move(Program), ContextImpl.getAdapter()); Cache.saveKernel(KernelName, UrDevice, ret_val, - KernelNameBasedData.getKernelSubcache()); + DeviceKernelInfo.getKernelSubcache()); return ret_val; } @@ -1820,7 +1820,7 @@ ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { return {}; } -KernelNameBasedData * +DeviceKernelInfo * ProgramManager::getOrCreateKernelNameBasedData(KernelNameStrRefT KernelName) { auto Result = m_KernelNameBasedDataMap.try_emplace(KernelName, KernelName); return &Result.first->second; diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 42cfaa2c6fdf..71f05e6e04df 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -201,7 +201,7 @@ class ProgramManager { FastKernelCacheValPtr getOrCreateKernel(context_impl &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, - KernelNameBasedData &KernelNameBasedData, + DeviceKernelInfo &DeviceKernelInfo, const NDRDescT &NDRDesc = {}); ur_kernel_handle_t getCachedMaterializedKernel( @@ -376,7 +376,7 @@ class ProgramManager { std::optional kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; - KernelNameBasedData * + DeviceKernelInfo * getOrCreateKernelNameBasedData(KernelNameStrRefT KernelName); std::set @@ -537,7 +537,7 @@ class ProgramManager { // Map for storing kernel name based data. Runtime lookup should be avoided // by caching the pointers when possible. - std::unordered_map + std::unordered_map m_KernelNameBasedDataMap; // Sanitizer type used in device image diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 85d3d80b3615..52005740a869 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1988,7 +1988,7 @@ std::string instrumentationGetKernelName( void instrumentationAddExtraKernelMetadata( xpti_td *&CmdTraceEvent, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, - KernelNameStrRefT KernelName, KernelNameBasedData &KernelNameBasedData, + KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, const std::shared_ptr &SyclKernel, queue_impl *Queue, std::vector &CGArgs) // CGArgs are not const since they could be // sorted in this function @@ -2015,7 +2015,7 @@ void instrumentationAddExtraKernelMetadata( FastKernelCacheValPtr FastKernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( Queue->getContextImpl(), Queue->getDeviceImpl(), KernelName, - KernelNameBasedData); + DeviceKernelInfo); EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; } @@ -2103,7 +2103,7 @@ std::pair emitKernelInstrumentationData( const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, const std::string_view SyclKernelName, - KernelNameBasedData &KernelNameBasedData, queue_impl *Queue, + DeviceKernelInfo &DeviceKernelInfo, queue_impl *Queue, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs) { @@ -2142,7 +2142,7 @@ std::pair emitKernelInstrumentationData( getQueueID(Queue)); instrumentationAddExtraKernelMetadata( CmdTraceEvent, NDRDesc, KernelBundleImplPtr, - std::string(SyclKernelName), KernelNameBasedData, SyclKernel, Queue, + std::string(SyclKernelName), DeviceKernelInfo, SyclKernel, Queue, CGArgs); xptiNotifySubscribers( @@ -2400,7 +2400,7 @@ static ur_result_t SetKernelParamsAndLaunch( const std::function &getMemAllocationFunc, bool IsCooperative, bool KernelUsesClusterLaunch, uint32_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage, - KernelNameStrRefT KernelName, KernelNameBasedData &KernelNameBasedData, + KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, void *KernelFuncPtr = nullptr, int KernelNumArgs = 0, detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr, bool KernelHasSpecialCaptures = true) { @@ -2447,7 +2447,7 @@ static ur_result_t SetKernelParamsAndLaunch( } const std::optional &ImplicitLocalArg = - KernelNameBasedData.getImplicitLocalArgPos(); + DeviceKernelInfo.getImplicitLocalArgPos(); // Set the implicit local memory buffer to support // get_work_group_scratch_memory. This is for backend not supporting // CUDA-style local memory setting. Note that we may have -1 as a position, @@ -2666,7 +2666,7 @@ void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - KernelNameBasedData &KernelNameBasedData, + DeviceKernelInfo &DeviceKernelInfo, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, @@ -2713,7 +2713,7 @@ void enqueueImpKernel( KernelMutex = SyclKernelImpl->getCacheMutex(); } else { KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, KernelName, KernelNameBasedData, NDRDesc); + ContextImpl, DeviceImpl, KernelName, DeviceKernelInfo, NDRDesc); Kernel = KernelCacheVal->MKernelHandle; KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; @@ -2760,7 +2760,7 @@ void enqueueImpKernel( Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList, OutEventImpl, EliminatedArgMask, getMemAllocationFunc, KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize, - BinImage, KernelName, KernelNameBasedData, KernelFuncPtr, KernelNumArgs, + BinImage, KernelName, DeviceKernelInfo, KernelFuncPtr, KernelNumArgs, KernelParamDescGetter, KernelHasSpecialCaptures); } if (UR_RESULT_SUCCESS != Error) { diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8fc7456e59b8..046764dcd705 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -628,7 +628,7 @@ void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - KernelNameBasedData &KernelNameBasedData, + DeviceKernelInfo &DeviceKernelInfo, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative, @@ -694,7 +694,7 @@ std::pair emitKernelInstrumentationData( xpti::stream_id_t StreamID, const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, - std::string_view SyclKernelName, KernelNameBasedData &KernelNameBasedData, + std::string_view SyclKernelName, DeviceKernelInfo &DeviceKernelInfo, queue_impl *Queue, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1699940c6f47..17b06c2cc676 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2608,13 +2608,13 @@ void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, void handler::setKernelNameBasedCachePtr( sycl::detail::KernelNameBasedCacheT *KernelNameBasedCachePtr) { setKernelNameBasedDataPtr( - reinterpret_cast( + reinterpret_cast( KernelNameBasedCachePtr)); } #endif void handler::setKernelNameBasedDataPtr( - sycl::detail::KernelNameBasedData *KernelNameBasedDataPtr) { + sycl::detail::DeviceKernelInfo *KernelNameBasedDataPtr) { impl->MKernelNameBasedDataPtr = KernelNameBasedDataPtr; } diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index de33de9ef99d..62da140648d0 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -62,7 +62,7 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { } std::unordered_map & + sycl::detail::DeviceKernelInfo> & getKernelNameBasedDataMap() { return m_KernelNameBasedDataMap; } From a4038736665192cb60686243a8418e817ca43c88 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 21 Aug 2025 11:32:21 -0700 Subject: [PATCH 08/25] Revert ABI dumps --- sycl/test/abi/sycl_symbols_linux.dump | 4 +--- sycl/test/abi/sycl_symbols_windows.dump | 12 +++++------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index a9bb5003f977..f41c07ee394b 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3334,7 +3334,6 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv _ZN4sycl3_V16detail21LocalAccessorBaseHostC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE -_ZN4sycl3_V16detail22free_function_info_map3addEPKPKcPKjj _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKNS0_4spanIcLm18446744073709551615EEENS0_12bundle_stateE @@ -3347,7 +3346,6 @@ _ZN4sycl3_V16detail22removeDuplicateDevicesERKSt6vectorINS0_6deviceESaIS3_EE _ZN4sycl3_V16detail23constructorNotificationEPvS2_NS0_6access6targetENS3_4modeERKNS1_13code_locationE _ZN4sycl3_V16detail24find_device_intersectionERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EE _ZN4sycl3_V16detail26createKernelNameBasedCacheEv -_ZN4sycl3_V16detail26getKernelNameBasedDataImplERKNS1_6stringE _ZN4sycl3_V16detail26isDeviceGlobalUsedInKernelEPKv _ZN4sycl3_V16detail27getPixelCoordLinearFiltModeENS0_3vecIfLi4EEENS0_15addressing_modeENS0_5rangeILi3EEERS3_ _ZN4sycl3_V16detail28SampledImageAccessorBaseHost10getAccDataEv @@ -3607,7 +3605,6 @@ _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm _ZN4sycl3_V17handler24registerDynamicParameterEPNS0_3ext6oneapi12experimental6detail22dynamic_parameter_implEi _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb -_ZN4sycl3_V17handler25setKernelNameBasedDataPtrEPNS0_6detail19KernelNameBasedDataE _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE _ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi @@ -3853,6 +3850,7 @@ _ZNK4sycl3_V16detail19kernel_bundle_plain5emptyEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost6getPtrEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv +_ZN4sycl3_V16detail22free_function_info_map3addEPKPKcPKjj _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost10getSamplerEv _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost12getNumOfDimsEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 28db6c2cd4cf..8d6235784776 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -229,9 +229,9 @@ ??$get_info_impl@Unative_vector_width_int@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_long@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_short@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ -??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_args@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_compute_units@device@info@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ +??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Uopencl_c_version@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uparent_device@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV012@XZ ??$get_info_impl@Upartition_affinity_domains@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4partition_affinity_domain@info@_V1@sycl@@V?$allocator@W4partition_affinity_domain@info@_V1@sycl@@@std@@@std@@XZ @@ -3841,7 +3841,7 @@ ?contextSetExtendedDeleter@pi@detail@_V1@sycl@@YAXAEBVcontext@34@P6AXPEAX@Z1@Z ?copyCodeLoc@handler@_V1@sycl@@AEAAXAEBV123@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?createKernelNameBasedCache@detail@_V1@sycl@@YAPEAVKernelNameBasedCacheT@123@XZ +?createKernelNameBasedCache@detail@_V1@sycl@@YAPEAUKernelNameBasedCacheT@123@XZ ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@PEAX_KAEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4073,7 +4073,6 @@ ?getEndTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ ?getKernelBundle@handler@_V1@sycl@@AEBA?AV?$kernel_bundle@$0A@@23@XZ ?getKernelName@handler@_V1@sycl@@AEAA?AVstring@detail@23@XZ -?getKernelNameBasedDataImpl@detail@_V1@sycl@@YAPEAVKernelNameBasedData@123@AEBVstring@123@@Z ?getMaxWorkGroups@handler@_V1@sycl@@AEAA?AV?$optional@V?$array@_K$02@std@@@std@@XZ ?getMaxWorkGroups_v2@handler@_V1@sycl@@AEAA?AV?$tuple@V?$array@_K$02@std@@_N@std@@XZ ?getMemoryObject@AccessorBaseHost@detail@_V1@sycl@@QEBAPEAXXZ @@ -4189,8 +4188,8 @@ ?get_impl@handler@_V1@sycl@@AEAAPEAVhandler_impl@detail@23@XZ ?get_kernel@kernel_bundle_plain@detail@_V1@sycl@@IEBA?AVkernel@34@AEBVkernel_id@34@@Z ?get_kernel_bundle@kernel@_V1@sycl@@QEBA?AV?$kernel_bundle@$01@23@XZ -?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z +?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@AEBV?$function@$$A6A_NAEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z@5@@Z ?get_kernel_id_impl@detail@_V1@sycl@@YA?AVkernel_id@23@Vstring_view@123@@Z @@ -4418,8 +4417,7 @@ ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z -?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAVKernelNameBasedCacheT@detail@23@@Z -?setKernelNameBasedDataPtr@handler@_V1@sycl@@AEAAXPEAVKernelNameBasedData@detail@23@@Z +?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@0V?$id@$00@23@@Z @@ -4431,8 +4429,8 @@ ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ From f96055bceaa1526df56d9b5095dbe1072c30a6b3 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 21 Aug 2025 13:08:33 -0700 Subject: [PATCH 09/25] Switch to `getDeviceKernelInfo()` --- .../sycl/detail/get_kernel_name_based_data.hpp | 14 -------------- sycl/include/sycl/detail/kernel_desc.hpp | 2 +- sycl/include/sycl/handler.hpp | 3 +-- sycl/source/detail/get_kernel_name_based_data.cpp | 7 +++---- .../Extensions/WorkGroupMemoryBackendArgument.cpp | 4 +++- sycl/unittests/helpers/MockKernelInfo.hpp | 6 ++++++ 6 files changed, 14 insertions(+), 22 deletions(-) diff --git a/sycl/include/sycl/detail/get_kernel_name_based_data.hpp b/sycl/include/sycl/detail/get_kernel_name_based_data.hpp index 6a38b134b695..abc728cd057e 100644 --- a/sycl/include/sycl/detail/get_kernel_name_based_data.hpp +++ b/sycl/include/sycl/detail/get_kernel_name_based_data.hpp @@ -19,20 +19,6 @@ class KernelNameBasedCacheT; __SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); #endif -class DeviceKernelInfo; - -__SYCL_EXPORT DeviceKernelInfo * -getKernelNameBasedDataImpl(detail::ABINeutralKernelNameStrRefT KernelName); - -// Retrieves and caches a data pointer to avoid kernel name based lookup -// overhead. -template -DeviceKernelInfo * -getKernelNameBasedData(detail::ABINeutralKernelNameStrRefT KernelName) { - static DeviceKernelInfo *Instance = getKernelNameBasedDataImpl(KernelName); - return Instance; -} - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/detail/kernel_desc.hpp b/sycl/include/sycl/detail/kernel_desc.hpp index 9062d02197a8..2de046eb136b 100644 --- a/sycl/include/sycl/detail/kernel_desc.hpp +++ b/sycl/include/sycl/detail/kernel_desc.hpp @@ -291,7 +291,7 @@ struct CompileTimeKernelInfoTy { unsigned LineNumber = 0; unsigned ColumnNumber = 0; int64_t KernelSize; - kernel_param_desc_t &(*ParamDescGetter)(int) = nullptr; + kernel_param_desc_t (*ParamDescGetter)(int) = nullptr; bool HasSpecialCaptures = false; }; diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index c5945fc00a99..6ca04164d9af 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -863,8 +863,7 @@ class __SYCL_EXPORT handler { constexpr std::string_view KernelNameStr = detail::getKernelName(); MKernelName = KernelNameStr; - setKernelNameBasedDataPtr( - detail::getKernelNameBasedData(KernelNameStr)); + setKernelNameBasedDataPtr(&detail::getDeviceKernelInfo()); } else { // In case w/o the integration header it is necessary to process // accessors from the list(which are associated with this handler) as diff --git a/sycl/source/detail/get_kernel_name_based_data.cpp b/sycl/source/detail/get_kernel_name_based_data.cpp index 9c299f6ac299..cc6ccf236713 100644 --- a/sycl/source/detail/get_kernel_name_based_data.cpp +++ b/sycl/source/detail/get_kernel_name_based_data.cpp @@ -20,10 +20,9 @@ KernelNameBasedCacheT *createKernelNameBasedCache() { } #endif -DeviceKernelInfo * -getKernelNameBasedDataImpl(detail::ABINeutralKernelNameStrRefT KernelName) { - return ProgramManager::getInstance().getOrCreateKernelNameBasedData( - KernelName.data()); +DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { + return *ProgramManager::getInstance().getOrCreateKernelNameBasedData( + Info.Name); } } // namespace detail diff --git a/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp b/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp index 8febd9676fb9..97159ba3a278 100644 --- a/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp +++ b/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -20,7 +21,8 @@ using arg_type = syclext::work_group_memory; namespace sycl { inline namespace _V1 { namespace detail { -template <> struct KernelInfo { +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr unsigned getNumParams() { return 1; } static constexpr const detail::kernel_param_desc_t &getParamDesc(int) { return WorkGroupMemory; diff --git a/sycl/unittests/helpers/MockKernelInfo.hpp b/sycl/unittests/helpers/MockKernelInfo.hpp index 836346eddd11..2908a91624ce 100644 --- a/sycl/unittests/helpers/MockKernelInfo.hpp +++ b/sycl/unittests/helpers/MockKernelInfo.hpp @@ -24,6 +24,12 @@ struct MockKernelInfoBase { static constexpr bool callsThisItem() { return false; } static constexpr bool callsAnyThisFreeFunction() { return false; } static constexpr int64_t getKernelSize() { return 1; } + + static constexpr const char *getFileName() { return ""; } + static constexpr const char *getFunctionName() { return ""; } + static constexpr unsigned getLineNumber() { return 0; } + static constexpr unsigned getColumnNumber() { return 0; } + }; } // namespace unittest From 290893503d73ac99ce4f3b733f37a8f558a8594b Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 21 Aug 2025 13:17:13 -0700 Subject: [PATCH 10/25] Update program_manager's signature for getOrCreateDeviceKernelInfo --- sycl/source/detail/get_kernel_name_based_data.cpp | 3 +-- sycl/source/detail/program_manager/program_manager.cpp | 6 +++--- sycl/source/detail/program_manager/program_manager.hpp | 7 +++++-- sycl/source/handler.cpp | 2 +- sycl/unittests/program_manager/Cleanup.cpp | 4 ++-- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/get_kernel_name_based_data.cpp b/sycl/source/detail/get_kernel_name_based_data.cpp index cc6ccf236713..5ac4b77243d9 100644 --- a/sycl/source/detail/get_kernel_name_based_data.cpp +++ b/sycl/source/detail/get_kernel_name_based_data.cpp @@ -21,8 +21,7 @@ KernelNameBasedCacheT *createKernelNameBasedCache() { #endif DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { - return *ProgramManager::getInstance().getOrCreateKernelNameBasedData( - Info.Name); + return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info.Name); } } // namespace detail diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 1d7b20416252..012d1499cf4d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1820,10 +1820,10 @@ ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { return {}; } -DeviceKernelInfo * -ProgramManager::getOrCreateKernelNameBasedData(KernelNameStrRefT KernelName) { +DeviceKernelInfo & +ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) { auto Result = m_KernelNameBasedDataMap.try_emplace(KernelName, KernelName); - return &Result.first->second; + return Result.first->second; } static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 71f05e6e04df..e3bbf5037510 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -376,8 +376,11 @@ class ProgramManager { std::optional kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; - DeviceKernelInfo * - getOrCreateKernelNameBasedData(KernelNameStrRefT KernelName); + DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName); + DeviceKernelInfo & + getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { + return getOrCreateDeviceKernelInfo(Info.Name); + } std::set getRawDeviceImages(const std::vector &KernelIDs); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 17b06c2cc676..0b2c027f72ed 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -547,7 +547,7 @@ event handler::finalize() { // Fetch the kernel name based data pointer if it hasn't been set (e.g. // in kernel bundle or free function cases). impl->MKernelNameBasedDataPtr = - detail::ProgramManager::getInstance().getOrCreateKernelNameBasedData( + &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( toKernelNameStrT(MKernelName)); } // If there were uses of set_specialization_constant build the kernel_bundle diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 62da140648d0..07509cf2e35d 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -378,7 +378,7 @@ TEST(ImageRemoval, BaseContainers) { std::vector KernelNames = generateRefNames({"A", "B", "C"}, "Kernel"); for (const std::string &Name : KernelNames) - PM.getOrCreateKernelNameBasedData(Name); + PM.getOrCreateDeviceKernelInfo(Name); checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(), {"A", "B", "C"}, "check failed before removal"); @@ -403,7 +403,7 @@ TEST(ImageRemoval, MultipleImagesPerEntry) { TestBinaries); std::string KernelName = generateRefName("A", "Kernel"); - PM.getOrCreateKernelNameBasedData(KernelName); + PM.getOrCreateDeviceKernelInfo(KernelName); checkAllInvolvedContainers( PM, ImagesToRemoveSameEntries.size() + ImagesToKeepSameEntries.size(), /*ExpectedEntryCount*/ 1, {"A"}, "check failed before removal", From cceffaf77b734154bd1b6110c9512c4e8e50ddbb Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 21 Aug 2025 15:05:50 -0700 Subject: [PATCH 11/25] Drop `sycl/detail/get_kernel_name_based_data.hpp` --- .../detail/get_kernel_name_based_data.hpp | 24 ------------------- sycl/include/sycl/detail/kernel_desc.hpp | 5 ++++ sycl/include/sycl/handler.hpp | 1 - .../detail/get_kernel_name_based_data.cpp | 3 ++- .../include_deps/sycl_detail_core.hpp.cpp | 3 +-- 5 files changed, 8 insertions(+), 28 deletions(-) delete mode 100644 sycl/include/sycl/detail/get_kernel_name_based_data.hpp diff --git a/sycl/include/sycl/detail/get_kernel_name_based_data.hpp b/sycl/include/sycl/detail/get_kernel_name_based_data.hpp deleted file mode 100644 index abc728cd057e..000000000000 --- a/sycl/include/sycl/detail/get_kernel_name_based_data.hpp +++ /dev/null @@ -1,24 +0,0 @@ -//==--------------------- get_kernel_name_based_data.hpp -------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include -#include - -namespace sycl { -inline namespace _V1 { -namespace detail { - -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES -class KernelNameBasedCacheT; -__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); -#endif - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/include/sycl/detail/kernel_desc.hpp b/sycl/include/sycl/detail/kernel_desc.hpp index 2de046eb136b..95f3ab66d657 100644 --- a/sycl/include/sycl/detail/kernel_desc.hpp +++ b/sycl/include/sycl/detail/kernel_desc.hpp @@ -319,6 +319,11 @@ template DeviceKernelInfo &getDeviceKernelInfo() { return Info; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +class KernelNameBasedCacheT; +__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); +#endif + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 6ca04164d9af..1f4b341b132d 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/sycl/source/detail/get_kernel_name_based_data.cpp b/sycl/source/detail/get_kernel_name_based_data.cpp index 5ac4b77243d9..fe74393da0d9 100644 --- a/sycl/source/detail/get_kernel_name_based_data.cpp +++ b/sycl/source/detail/get_kernel_name_based_data.cpp @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// +#include + #include #include -#include namespace sycl { inline namespace _V1 { diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 64b481940fa4..e03d3eae08c0 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -130,8 +130,6 @@ // CHECK-NEXT: CL/cl_version.h // CHECK-NEXT: CL/cl_platform.h // CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/get_kernel_name_based_data.hpp -// CHECK-NEXT: detail/kernel_name_str_t.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp @@ -140,6 +138,7 @@ // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp +// CHECK-NEXT: detail/kernel_name_str_t.hpp // CHECK-NEXT: detail/reduction_forward.hpp // CHECK-NEXT: detail/ur.hpp // CHECK-NEXT: ur_api_funcs.def From 7fa251993533badf0bfcd93b65997bd45f9c2121 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 21 Aug 2025 15:30:18 -0700 Subject: [PATCH 12/25] class DeviceKernelInfo : public CompileTimeKernelInfoTy --- sycl/include/sycl/detail/kernel_desc.hpp | 20 +++++++++---------- .../detail/get_kernel_name_based_data.cpp | 2 +- sycl/source/detail/kernel_name_based_data.cpp | 7 ++++--- sycl/source/detail/kernel_name_based_data.hpp | 5 +++-- .../program_manager/program_manager.cpp | 5 +++-- .../program_manager/program_manager.hpp | 7 ++++--- 6 files changed, 25 insertions(+), 21 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_desc.hpp b/sycl/include/sycl/detail/kernel_desc.hpp index 95f3ab66d657..a825e920e04f 100644 --- a/sycl/include/sycl/detail/kernel_desc.hpp +++ b/sycl/include/sycl/detail/kernel_desc.hpp @@ -283,16 +283,16 @@ inline namespace compile_time_kernel_info_v1 { // at least for as long as we support user apps built with GNU libstdc++'s // pre-C++11 ABI. struct CompileTimeKernelInfoTy { - const char *Name = nullptr; - unsigned NumParams = 0; - bool IsESIMD = false; - const char *FileName = ""; - const char *FunctionName = ""; - unsigned LineNumber = 0; - unsigned ColumnNumber = 0; - int64_t KernelSize; - kernel_param_desc_t (*ParamDescGetter)(int) = nullptr; - bool HasSpecialCaptures = false; + const char * const Name = nullptr; + const unsigned NumParams = 0; + const bool IsESIMD = false; + const char * const FileName = ""; + const char *const FunctionName = ""; + const unsigned LineNumber = 0; + const unsigned ColumnNumber = 0; + const int64_t KernelSize = 0; + kernel_param_desc_t (*const ParamDescGetter)(int) = nullptr; + const bool HasSpecialCaptures = false; }; template diff --git a/sycl/source/detail/get_kernel_name_based_data.cpp b/sycl/source/detail/get_kernel_name_based_data.cpp index fe74393da0d9..5cc54bc253a4 100644 --- a/sycl/source/detail/get_kernel_name_based_data.cpp +++ b/sycl/source/detail/get_kernel_name_based_data.cpp @@ -22,7 +22,7 @@ KernelNameBasedCacheT *createKernelNameBasedCache() { #endif DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { - return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info.Name); + return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info); } } // namespace detail diff --git a/sycl/source/detail/kernel_name_based_data.cpp b/sycl/source/detail/kernel_name_based_data.cpp index 4ec499403f66..cd7d191a4ee4 100644 --- a/sycl/source/detail/kernel_name_based_data.cpp +++ b/sycl/source/detail/kernel_name_based_data.cpp @@ -12,8 +12,9 @@ namespace sycl { inline namespace _V1 { namespace detail { -DeviceKernelInfo::DeviceKernelInfo(KernelNameStrRefT KernelName) { - init(KernelName); +DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) + : CompileTimeKernelInfoTy(Info) { + init(Name); } void DeviceKernelInfo::init(KernelNameStrRefT KernelName) { @@ -53,4 +54,4 @@ void DeviceKernelInfo::assertInitialized() { } // namespace detail } // namespace _V1 -} // namespace sycl \ No newline at end of file +} // namespace sycl diff --git a/sycl/source/detail/kernel_name_based_data.hpp b/sycl/source/detail/kernel_name_based_data.hpp index 0081bae3929e..02c71fd75ab5 100644 --- a/sycl/source/detail/kernel_name_based_data.hpp +++ b/sycl/source/detail/kernel_name_based_data.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -85,12 +86,12 @@ struct FastKernelSubcacheT { // TODO Currently this class duplicates information fetched from the program // manager. Instead, we should merge all of the kernel name based information // into this structure and get rid of the other KernelName -> * maps. -class DeviceKernelInfo { +class DeviceKernelInfo : public CompileTimeKernelInfoTy { public: #ifndef __INTEL_PREVIEW_BREAKING_CHANGES DeviceKernelInfo() = default; #endif - DeviceKernelInfo(KernelNameStrRefT KernelName); + DeviceKernelInfo(const CompileTimeKernelInfoTy &Info); void init(KernelNameStrRefT KernelName); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 012d1499cf4d..182d0fc6dd70 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1821,8 +1821,9 @@ ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { } DeviceKernelInfo & -ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) { - auto Result = m_KernelNameBasedDataMap.try_emplace(KernelName, KernelName); +ProgramManager::getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { + auto Result = + m_KernelNameBasedDataMap.try_emplace(KernelNameStrT{Info.Name}, Info); return Result.first->second; } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index e3bbf5037510..b0f9552febbd 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -376,10 +376,11 @@ class ProgramManager { std::optional kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; - DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName); DeviceKernelInfo & - getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { - return getOrCreateDeviceKernelInfo(Info.Name); + getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) { + return getOrCreateDeviceKernelInfo( + CompileTimeKernelInfoTy{KernelName.data()}); } std::set From ed389b681d70328beb278a9c899e4eedde345f8d Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 21 Aug 2025 15:49:12 -0700 Subject: [PATCH 13/25] Drop ABINeutralKernelNameStrRefT --- sycl/include/sycl/detail/kernel_name_str_t.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_name_str_t.hpp b/sycl/include/sycl/detail/kernel_name_str_t.hpp index b8ceb395431d..e0079ffb09c7 100644 --- a/sycl/include/sycl/detail/kernel_name_str_t.hpp +++ b/sycl/include/sycl/detail/kernel_name_str_t.hpp @@ -18,12 +18,10 @@ namespace detail { using KernelNameStrT = std::string_view; using KernelNameStrRefT = std::string_view; using ABINeutralKernelNameStrT = detail::string_view; -using ABINeutralKernelNameStrRefT = detail::string_view; #else using KernelNameStrT = std::string; using KernelNameStrRefT = const std::string &; using ABINeutralKernelNameStrT = detail::string; -using ABINeutralKernelNameStrRefT = const detail::string &; #endif inline KernelNameStrT toKernelNameStrT(const ABINeutralKernelNameStrT &str) { From 66f4928499810919d0d1b09df89d2f837786e22f Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 22 Aug 2025 08:24:55 -0700 Subject: [PATCH 14/25] Adjust unit tests --- .../Extensions/CommandGraph/CommandGraph.cpp | 15 +++-- .../CommandGraph/CommonReferenceSemantics.cpp | 21 +++++-- .../Extensions/CommandGraph/Exceptions.cpp | 55 ++++++++++++------- sycl/unittests/helpers/TestKernel.hpp | 33 ++++++++++- 4 files changed, 91 insertions(+), 33 deletions(-) diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 82fc0ed56c09..dedd4ebbcb40 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -9,6 +9,13 @@ #include +class Kernel1; +class Kernel2; +class Kernel3; +MOCK_INTEGRATION_HEADER(Kernel1) +MOCK_INTEGRATION_HEADER(Kernel2) +MOCK_INTEGRATION_HEADER(Kernel3) + using namespace sycl; using namespace sycl::ext::oneapi; @@ -630,7 +637,7 @@ TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { Graph1.begin_recording(Q1); - auto GraphEvent1 = Q1.single_task([=] {}); + auto GraphEvent1 = Q1.single_task([=] {}); ASSERT_EQ(Q1.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::recording); ASSERT_EQ(Q2.ext_oneapi_get_state(), @@ -638,7 +645,7 @@ TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { ASSERT_EQ(Q3.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::executing); - auto GraphEvent2 = Q2.single_task(GraphEvent1, [=] {}); + auto GraphEvent2 = Q2.single_task(GraphEvent1, [=] {}); ASSERT_EQ(Q1.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::recording); ASSERT_EQ(Q2.ext_oneapi_get_state(), @@ -646,8 +653,8 @@ TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { ASSERT_EQ(Q3.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::executing); - auto GraphEvent3 = Q3.parallel_for(range<1>{1024}, GraphEvent1, - [=](item<1> Id) {}); + auto GraphEvent3 = + Q3.parallel_for(range<1>{1024}, GraphEvent1, [=](item<1> Id) {}); ASSERT_EQ(Q1.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::recording); ASSERT_EQ(Q2.ext_oneapi_get_state(), diff --git a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp index c6a9333cb02a..d6fd4d1ec5e4 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp @@ -12,6 +12,9 @@ using namespace sycl; using namespace sycl::ext::oneapi; +class MockKernel; +MOCK_INTEGRATION_HEADER(MockKernel) + /** * Checks that the operators and constructors of graph related classes meet the * common reference semantics. @@ -70,8 +73,9 @@ TEST_F(CommandGraphTest, NodeSemantics) { experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); auto Factory = [&]() { - return Graph.add( - [&](handler &CGH) { CGH.parallel_for(1, [=](item<1> Item) {}); }); + return Graph.add([&](handler &CGH) { + CGH.parallel_for(1, [=](item<1> Item) {}); + }); }; ASSERT_NO_FATAL_FAILURE(testSemantics(Factory)); } @@ -80,7 +84,9 @@ TEST_F(CommandGraphTest, DynamicCGSemantics) { sycl::queue Queue; experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); - auto CGF = [&](handler &CGH) { CGH.parallel_for(1, [=](item<1> Item) {}); }; + auto CGF = [&](handler &CGH) { + CGH.parallel_for(1, [=](item<1> Item) {}); + }; auto Factory = [&]() { return experimental::dynamic_command_group(Graph, {CGF}); @@ -185,8 +191,9 @@ TEST_F(CommandGraphTest, NodeHash) { experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); auto Factory = [&]() { - return Graph.add( - [&](handler &CGH) { CGH.parallel_for(1, [=](item<1> Item) {}); }); + return Graph.add([&](handler &CGH) { + CGH.parallel_for(1, [=](item<1> Item) {}); + }); }; ASSERT_NO_FATAL_FAILURE(testHash(Factory)); } @@ -195,7 +202,9 @@ TEST_F(CommandGraphTest, DynamicCommandGroupHash) { sycl::queue Queue; experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); - auto CGF = [&](handler &CGH) { CGH.parallel_for(1, [=](item<1> Item) {}); }; + auto CGF = [&](handler &CGH) { + CGH.parallel_for(1, [=](item<1> Item) {}); + }; auto Factory = [&]() { return experimental::dynamic_command_group(Graph, {CGF}); diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 42444dff8289..1a635a751229 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include #include "Common.hpp" @@ -346,7 +347,7 @@ TEST_F(CommandGraphTest, Reductions) { { try { Graph.add([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( range<1>{1}, reduction(&ReduVar, int{0}, sycl::plus<>()), [=](item<1> idx, auto &Sum) {}); }); @@ -366,7 +367,7 @@ TEST_F(CommandGraphTest, Streams) { try { Graph.add([&](handler &CGH) { sycl::stream Out(WorkItems * 16, 16, CGH); - CGH.parallel_for( + CGH.parallel_for( range<1>(WorkItems), [=](item<1> id) { Out << id.get_linear_id() << sycl::endl; }); }); @@ -422,7 +423,7 @@ TEST_F(CommandGraphTest, WorkGroupScratchMemoryCheck) { { try { Graph.add([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( range<1>{1}, ext::oneapi::experimental::properties{ ext::oneapi::experimental::work_group_scratch_size( @@ -679,11 +680,11 @@ TEST_F(CommandGraphTest, TransitiveRecordingWrongContext) { Graph.begin_recording(Q1); auto GraphEvent1 = - Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); ASSERT_THROW(Q2.submit([&](handler &CGH) { CGH.depends_on(GraphEvent1); - CGH.single_task([=] {}); + CGH.single_task([=] {}); }), sycl::exception); } @@ -710,11 +711,11 @@ TEST_F(CommandGraphTest, TransitiveRecordingWrongDevice) { Graph.begin_recording(Q1); auto GraphEvent1 = - Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); ASSERT_THROW(Q2.submit([&](handler &CGH) { CGH.depends_on(GraphEvent1); - CGH.single_task([=] {}); + CGH.single_task([=] {}); }), sycl::exception); } @@ -736,11 +737,11 @@ TEST_F(CommandGraphTest, RecordingWrongGraphDep) { Graph2.begin_recording(Q2); auto GraphEvent1 = - Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); ASSERT_THROW(Q2.submit([&](handler &CGH) { CGH.depends_on(GraphEvent1); - CGH.single_task([=] {}); + CGH.single_task([=] {}); }), sycl::exception); } @@ -779,23 +780,27 @@ TEST_F(CommandGraphTest, DynamicCommandGroupMismatchEventEdges) { Graph.begin_recording(Queue); auto EventA = Queue.submit([&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = 1; }); + CGH.parallel_for( + N, [=](item<1> Item) { PtrA[Item.get_id()] = 1; }); }); auto EventB = Queue.submit([&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = 4; }); + CGH.parallel_for( + N, [=](item<1> Item) { PtrB[Item.get_id()] = 4; }); }); Graph.end_recording(); auto CGFA = [&](handler &CGH) { CGH.depends_on(EventA); - CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] += 2; }); + CGH.parallel_for( + N, [=](item<1> Item) { PtrA[Item.get_id()] += 2; }); }; auto CGFB = [&](handler &CGH) { CGH.depends_on(EventB); - CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] += 0xA; }); + CGH.parallel_for( + N, [=](item<1> Item) { PtrB[Item.get_id()] += 0xA; }); }; experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); @@ -817,12 +822,14 @@ TEST_F(CommandGraphTest, DynamicCommandGroupBufferThrows) { auto CGFA = [&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 2; }); + CGH.parallel_for( + N, [=](item<1> Item) { Acc[Item.get_id()] = 2; }); }; auto CGFB = [&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); + CGH.parallel_for( + N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); }; experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); @@ -846,12 +853,14 @@ TEST_F(CommandGraphTest, DynamicCommandGroupBufferHostAccThrows) { {experimental::property::graph::assume_buffer_outlives_graph{}}}; auto CGFA = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = 2; }); + CGH.parallel_for( + N, [=](item<1> Item) { Ptr[Item.get_id()] = 2; }); }; auto CGFB = [&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); + CGH.parallel_for( + N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); }; experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); @@ -882,24 +891,28 @@ TEST_F(CommandGraphTest, DynamicCommandGroupMismatchAccessorEdges) { Queue.submit([&](handler &CGH) { auto AccA = BufA.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] = 1; }); + CGH.parallel_for( + N, [=](item<1> Item) { AccA[Item.get_id()] = 1; }); }); Queue.submit([&](handler &CGH) { auto AccB = BufB.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] = 4; }); + CGH.parallel_for( + N, [=](item<1> Item) { AccB[Item.get_id()] = 4; }); }); Graph.end_recording(); auto CGFA = [&](handler &CGH) { auto AccA = BufA.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] += 2; }); + CGH.parallel_for( + N, [=](item<1> Item) { AccA[Item.get_id()] += 2; }); }; auto CGFB = [&](handler &CGH) { auto AccB = BufB.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] += 0xA; }); + CGH.parallel_for( + N, [=](item<1> Item) { AccB[Item.get_id()] += 0xA; }); }; experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); diff --git a/sycl/unittests/helpers/TestKernel.hpp b/sycl/unittests/helpers/TestKernel.hpp index 85e6f28c5f67..2bf3e7a04301 100644 --- a/sycl/unittests/helpers/TestKernel.hpp +++ b/sycl/unittests/helpers/TestKernel.hpp @@ -10,9 +10,12 @@ #include "MockDeviceImage.hpp" #include "MockKernelInfo.hpp" +#include class TestKernel; class TestKernelWithAcc; +class TestKernelWithStream; +class TestKernelWithPtr; namespace sycl { inline namespace _V1 { @@ -44,11 +47,37 @@ struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr unsigned getColumnNumber() { return 8; } }; +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "TestKernelWithStream"; } + static constexpr int64_t getKernelSize() { return sizeof(sycl::stream); } + static constexpr const char *getFileName() { return "TestKernel.hpp"; } + static constexpr const char *getFunctionName() { + return "TestKernelWithStreamFunctionName"; + } + static constexpr unsigned getLineNumber() { return 15; } + static constexpr unsigned getColumnNumber() { return 8; } +}; + +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "TestKernelWithPtr"; } + static constexpr int64_t getKernelSize() { return sizeof(void *); } + static constexpr const char *getFileName() { return "TestKernel.hpp"; } + static constexpr const char *getFunctionName() { + return "TestKernelWithPtrFunctionName"; + } + static constexpr unsigned getLineNumber() { return 16; } + static constexpr unsigned getColumnNumber() { return 8; } +}; + } // namespace detail } // namespace _V1 } // namespace sycl static sycl::unittest::MockDeviceImage Imgs[] = { sycl::unittest::generateDefaultImage({"TestKernel"}), - sycl::unittest::generateDefaultImage({"TestKernelWithAcc"})}; -static sycl::unittest::MockDeviceImageArray<2> ImgArray{Imgs}; + sycl::unittest::generateDefaultImage({"TestKernelWithAcc"}), + sycl::unittest::generateDefaultImage({"TestKernelWithStream"}), + sycl::unittest::generateDefaultImage({"TestKernelWithPtr"})}; +static sycl::unittest::MockDeviceImageArray<4> ImgArray{Imgs}; From fa50f3f862788f13b122d75a3c75fe2d15d3362d Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 22 Aug 2025 08:34:20 -0700 Subject: [PATCH 15/25] Complete the renaming --- sycl/include/sycl/handler.hpp | 5 ++-- sycl/source/detail/cg.hpp | 9 +++--- sycl/source/detail/global_handler.cpp | 12 ++++---- sycl/source/detail/global_handler.hpp | 2 +- sycl/source/detail/graph/graph_impl.cpp | 5 ++-- sycl/source/detail/handler_impl.hpp | 4 +-- sycl/source/detail/kernel_name_based_data.hpp | 9 +++--- .../program_manager/program_manager.cpp | 4 +-- .../program_manager/program_manager.hpp | 7 ++--- sycl/source/detail/queue_impl.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 13 ++++---- sycl/source/handler.cpp | 30 +++++++++---------- sycl/unittests/program_manager/Cleanup.cpp | 8 ++--- .../arg_mask/EliminatedArgMask.cpp | 2 +- .../scheduler/SchedulerTestUtils.hpp | 2 +- .../scheduler/StreamInitDependencyOnHost.cpp | 2 +- 16 files changed, 55 insertions(+), 61 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 1f4b341b132d..ee17b2d14ddb 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -862,7 +862,7 @@ class __SYCL_EXPORT handler { constexpr std::string_view KernelNameStr = detail::getKernelName(); MKernelName = KernelNameStr; - setKernelNameBasedDataPtr(&detail::getDeviceKernelInfo()); + setDeviceKernelInfoPtr(&detail::getDeviceKernelInfo()); } else { // In case w/o the integration header it is necessary to process // accessors from the list(which are associated with this handler) as @@ -3688,8 +3688,7 @@ class __SYCL_EXPORT handler { void setKernelNameBasedCachePtr( detail::KernelNameBasedCacheT *KernelNameBasedCachePtr); #endif - void setKernelNameBasedDataPtr( - detail::DeviceKernelInfo *KernelNameBasedDataPtr); + void setDeviceKernelInfoPtr(detail::DeviceKernelInfo *DeviceKernelInfoPtr); queue getQueue(); diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index c92f201699c1..e92f9b40eff4 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -254,7 +254,7 @@ class CGExecKernel : public CG { std::shared_ptr MKernelBundle; std::vector MArgs; KernelNameStrT MKernelName; - DeviceKernelInfo &MKernelNameBasedData; + DeviceKernelInfo &MDeviceKernelInfo; std::vector> MStreams; std::vector> MAuxiliaryResources; /// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list @@ -269,8 +269,7 @@ class CGExecKernel : public CG { std::shared_ptr SyclKernel, std::shared_ptr KernelBundle, CG::StorageInitHelper CGData, std::vector Args, - KernelNameStrT KernelName, - DeviceKernelInfo &DeviceKernelInfo, + KernelNameStrT KernelName, DeviceKernelInfo &DeviceKernelInfo, std::vector> Streams, std::vector> AuxiliaryResources, CGType Type, ur_kernel_cache_config_t KernelCacheConfig, @@ -279,8 +278,8 @@ class CGExecKernel : public CG { : CG(Type, std::move(CGData), std::move(loc)), MNDRDesc(NDRDesc), MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)), - MKernelName(std::move(KernelName)), - MKernelNameBasedData(DeviceKernelInfo), MStreams(std::move(Streams)), + MKernelName(std::move(KernelName)), MDeviceKernelInfo(DeviceKernelInfo), + MStreams(std::move(Streams)), MAuxiliaryResources(std::move(AuxiliaryResources)), MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)), MKernelIsCooperative(KernelIsCooperative), diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 61fc5d8a8914..7bdd1c489706 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -251,11 +251,11 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *GlobalHandler::createKernelNameBasedCache() { - static std::deque &KernelNameBasedDataStorage = - getOrCreate(MKernelNameBasedDataStorage); - LockGuard LG{MKernelNameBasedDataStorage.Lock}; + static std::deque &DeviceKernelInfoStorage = + getOrCreate(MDeviceKernelInfoStorage); + LockGuard LG{MDeviceKernelInfoStorage.Lock}; return reinterpret_cast( - &KernelNameBasedDataStorage.emplace_back()); + &DeviceKernelInfoStorage.emplace_back()); } #endif @@ -394,9 +394,9 @@ void shutdown_late() { Handler->MProgramManager.Inst.reset(nullptr); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - // Kernel cache, which is part of kernel name based data, + // Kernel cache, which is part of device kernel info, // stores handles to the adapter, so clear it before releasing adapters. - Handler->MKernelNameBasedDataStorage.Inst.reset(nullptr); + Handler->MDeviceKernelInfoStorage.Inst.reset(nullptr); #endif // Clear the adapters and reset the instance if it was there. diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index aea730c5afe4..b4dbbd938ebb 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -138,7 +138,7 @@ class GlobalHandler { // Thread pool for host task and event callbacks execution InstWithLock MHostTaskThreadPool; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - InstWithLock> MKernelNameBasedDataStorage; + InstWithLock> MDeviceKernelInfoStorage; #endif }; } // namespace detail diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index 79c699023f66..71abf3cd6c46 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -741,7 +741,7 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( CGExec->MLine, CGExec->MColumn); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc, - CGExec->MKernelName.data(), CGExec->MKernelNameBasedData, nullptr, + CGExec->MKernelName.data(), CGExec->MDeviceKernelInfo, nullptr, CGExec->MNDRDesc, CGExec->MKernelBundle.get(), CGExec->MArgs); if (CmdTraceEvent) sycl::detail::emitInstrumentationGeneral( @@ -1574,8 +1574,7 @@ void exec_graph_impl::populateURKernelUpdateStructs( EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, ExecCG.MKernelName, - ExecCG.MKernelNameBasedData); + ContextImpl, DeviceImpl, ExecCG.MKernelName, ExecCG.MDeviceKernelInfo); UrKernel = BundleObjs->MKernelHandle; EliminatedArgMask = BundleObjs->MKernelArgMask; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index b76cf0778be8..a8b217a1e64f 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -243,9 +243,9 @@ class handler_impl { bool MKernelIsESIMD = false; bool MKernelHasSpecialCaptures = true; - // A pointer to kernel name based data. Cached on the application side in + // A pointer to device kernel information. Cached on the application side in // headers or retrieved from program manager. - DeviceKernelInfo *MKernelNameBasedDataPtr = nullptr; + DeviceKernelInfo *MDeviceKernelInfoPtr = nullptr; }; } // namespace detail diff --git a/sycl/source/detail/kernel_name_based_data.hpp b/sycl/source/detail/kernel_name_based_data.hpp index 02c71fd75ab5..f98cd853c997 100644 --- a/sycl/source/detail/kernel_name_based_data.hpp +++ b/sycl/source/detail/kernel_name_based_data.hpp @@ -80,11 +80,12 @@ struct FastKernelSubcacheT { FastKernelSubcacheMutexT Mutex; }; -// This class is used for aggregating kernel name based information. -// Pointers to instances of this class are stored in header function templates -// as a static variable to avoid repeated runtime lookup overhead. +// This class aggregates information specific to device kernels (i.e. +// information that is uniform between different submissions of the same +// kernel). Pointers to instances of this class are stored in header function +// templates as a static variable to avoid repeated runtime lookup overhead. // TODO Currently this class duplicates information fetched from the program -// manager. Instead, we should merge all of the kernel name based information +// manager. Instead, we should merge all of this information // into this structure and get rid of the other KernelName -> * maps. class DeviceKernelInfo : public CompileTimeKernelInfoTy { public: diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 182d0fc6dd70..760e86528bcd 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1823,7 +1823,7 @@ ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { DeviceKernelInfo & ProgramManager::getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { auto Result = - m_KernelNameBasedDataMap.try_emplace(KernelNameStrT{Info.Name}, Info); + m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name}, Info); return Result.first->second; } @@ -2234,7 +2234,7 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // share lifetime. m_KernelUsesAssert.erase(Name); m_KernelImplicitLocalArgPos.erase(Name); - m_KernelNameBasedDataMap.erase(Name); + m_DeviceKernelInfoMap.erase(Name); m_KernelNameRefCount.erase(RefCountIt); if (Name2IDIt != m_KernelName2KernelIDs.end()) m_KernelName2KernelIDs.erase(Name2IDIt); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index b0f9552febbd..967561333b1a 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -459,7 +459,7 @@ class ProgramManager { /// Keeps track of binary image to kernel name reference count. /// Used for checking if the last image referencing the kernel name - /// is removed in order to trigger cleanup of kernel name based information. + /// is removed in order to trigger cleanup of kernel specific information. /// Access must be guarded by the m_KernelIDsMutex mutex. std::unordered_map m_KernelNameRefCount; @@ -539,10 +539,9 @@ class ProgramManager { KernelUsesAssertSet m_KernelUsesAssert; std::unordered_map m_KernelImplicitLocalArgPos; - // Map for storing kernel name based data. Runtime lookup should be avoided + // Map for storing device kernel information. Runtime lookup should be avoided // by caching the pointers when possible. - std::unordered_map - m_KernelNameBasedDataMap; + std::unordered_map m_DeviceKernelInfoMap; // Sanitizer type used in device image SanitizerType m_SanitizerFoundInImage; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 9f1a88115057..e3151ba680ea 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -871,7 +871,7 @@ class queue_impl : public std::enable_shared_from_this { // Kernel only uses assert if it's non interop one KernelUsesAssert = (!Handler.MKernel || Handler.MKernel->hasSYCLMetadata()) && - Handler.impl->MKernelNameBasedDataPtr->usesAssert(); + Handler.impl->MDeviceKernelInfoPtr->usesAssert(); auto &PostProcess = *PostProcessorFunc; PostProcess(IsKernel, KernelUsesAssert, Event); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 52005740a869..2fd94221e88b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2198,7 +2198,7 @@ void ExecCGCommand::emitInstrumentationData() { reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( CmdTraceEvent, KernelCG->MNDRDesc, KernelCG->getKernelBundle().get(), - KernelCG->MKernelName, KernelCG->MKernelNameBasedData, + KernelCG->MKernelName, KernelCG->MDeviceKernelInfo, KernelCG->MSyclKernel, MQueue.get(), KernelCG->MArgs); } @@ -2551,7 +2551,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, FastKernelCacheValPtr FastKernelCacheVal = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName, - CommandGroup.MKernelNameBasedData); + CommandGroup.MDeviceKernelInfo); UrKernel = FastKernelCacheVal->MKernelHandle; EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; // To keep UrKernel valid, we return FastKernelCacheValPtr. @@ -3240,7 +3240,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (!EventImpl) { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = (!SyclKernel || SyclKernel->hasSYCLMetadata()) && - ExecKernel->MKernelNameBasedData.usesAssert(); + ExecKernel->MDeviceKernelInfo.usesAssert(); if (KernelUsesAssert) { EventImpl = MEvent.get(); } @@ -3253,10 +3253,9 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { } enqueueImpKernel( *MQueue, NDRDesc, Args, ExecKernel->getKernelBundle().get(), - SyclKernel.get(), KernelName, ExecKernel->MKernelNameBasedData, - RawEvents, EventImpl, getMemAllocationFunc, - ExecKernel->MKernelCacheConfig, ExecKernel->MKernelIsCooperative, - ExecKernel->MKernelUsesClusterLaunch, + SyclKernel.get(), KernelName, ExecKernel->MDeviceKernelInfo, RawEvents, + EventImpl, getMemAllocationFunc, ExecKernel->MKernelCacheConfig, + ExecKernel->MKernelIsCooperative, ExecKernel->MKernelUsesClusterLaunch, ExecKernel->MKernelWorkGroupMemorySize, BinImage); return UR_RESULT_SUCCESS; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 0b2c027f72ed..39396cbf2151 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -538,15 +538,14 @@ event handler::finalize() { } if (type == detail::CGType::Kernel) { - if (impl->MKernelNameBasedDataPtr) { + if (impl->MDeviceKernelInfoPtr) { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - impl->MKernelNameBasedDataPtr->initIfNeeded( - toKernelNameStrT(MKernelName)); + impl->MDeviceKernelInfoPtr->initIfNeeded(toKernelNameStrT(MKernelName)); #endif } else { - // Fetch the kernel name based data pointer if it hasn't been set (e.g. + // Fetch the device kernel info pointer if it hasn't been set (e.g. // in kernel bundle or free function cases). - impl->MKernelNameBasedDataPtr = + impl->MDeviceKernelInfoPtr = &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( toKernelNameStrT(MKernelName)); } @@ -624,7 +623,7 @@ event handler::finalize() { if (DiscardEvent) { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && - impl->MKernelNameBasedDataPtr->usesAssert(); + impl->MDeviceKernelInfoPtr->usesAssert(); DiscardEvent = !KernelUsesAssert; } @@ -645,7 +644,7 @@ event handler::finalize() { StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, - MKernelName.data(), *impl->MKernelNameBasedDataPtr, + MKernelName.data(), *impl->MDeviceKernelInfoPtr, impl->get_queue_or_null(), impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); detail::emitInstrumentationGeneral(StreamID, InstanceID, @@ -662,8 +661,8 @@ event handler::finalize() { enqueueImpKernel( impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel.get(), toKernelNameStrT(MKernelName), - *impl->MKernelNameBasedDataPtr, RawEvents, ResultEvent.get(), - nullptr, impl->MKernelCacheConfig, impl->MKernelIsCooperative, + *impl->MDeviceKernelInfoPtr, RawEvents, ResultEvent.get(), nullptr, + impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, BinImage, impl->MKernelFuncPtr, impl->MKernelNumArgs, impl->MKernelParamDescGetter, impl->MKernelHasSpecialCaptures); @@ -723,7 +722,7 @@ event handler::finalize() { impl->MNDRDesc, std::move(MHostKernel), std::move(MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), toKernelNameStrT(MKernelName), - *impl->MKernelNameBasedDataPtr, std::move(MStreamStorage), + *impl->MDeviceKernelInfoPtr, std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, @@ -2607,15 +2606,14 @@ void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, #ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelNameBasedCachePtr( sycl::detail::KernelNameBasedCacheT *KernelNameBasedCachePtr) { - setKernelNameBasedDataPtr( - reinterpret_cast( - KernelNameBasedCachePtr)); + setDeviceKernelInfoPtr(reinterpret_cast( + KernelNameBasedCachePtr)); } #endif -void handler::setKernelNameBasedDataPtr( - sycl::detail::DeviceKernelInfo *KernelNameBasedDataPtr) { - impl->MKernelNameBasedDataPtr = KernelNameBasedDataPtr; +void handler::setDeviceKernelInfoPtr( + sycl::detail::DeviceKernelInfo *DeviceKernelInfoPtr) { + impl->MDeviceKernelInfoPtr = DeviceKernelInfoPtr; } void handler::setKernelInfo( diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 07509cf2e35d..a75d02b6beff 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -63,8 +63,8 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { std::unordered_map & - getKernelNameBasedDataMap() { - return m_KernelNameBasedDataMap; + getDeviceKernelInfoMap() { + return m_DeviceKernelInfoMap; } std::unordered_map & @@ -313,9 +313,9 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM, checkContainer(PM.getVFSet2BinImage(), ExpectedEntryCount, generateRefNames(ImgIds, "VF"), "VFSet2BinImage " + CommentPostfix); - checkContainer(PM.getKernelNameBasedDataMap(), ExpectedEntryCount, + checkContainer(PM.getDeviceKernelInfoMap(), ExpectedEntryCount, generateRefNames(ImgIds, "Kernel"), - "Kernel name based data map " + CommentPostfix); + "Device kernel info map " + CommentPostfix); checkContainer(PM.getKernelNameRefCount(), ExpectedEntryCount, generateRefNames(ImgIds, "Kernel"), "Kernel name reference count " + CommentPostfix); diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index ffdb73d10766..e6543927894a 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -148,7 +148,7 @@ class MockHandler : public sycl::handler { std::move(impl->MNDRDesc), std::move(CGH->MHostKernel), std::move(CGH->MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), - CGH->MKernelName.data(), *impl->MKernelNameBasedDataPtr, + CGH->MKernelName.data(), *impl->MDeviceKernelInfoPtr, std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), impl->MCGType, {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 3704bb3822c7..c3bdb342170d 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -303,7 +303,7 @@ class MockHandlerCustomFinalize : public MockHandler { CommandGroup.reset(new sycl::detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), std::move(impl->MKernelBundle), std::move(CGData), getArgs(), - getKernelName(), *impl->MKernelNameBasedDataPtr, getStreamStorage(), + getKernelName(), *impl->MDeviceKernelInfoPtr, getStreamStorage(), impl->MAuxiliaryResources, getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 0f928378425a..2b6d0cc56343 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -33,7 +33,7 @@ class MockHandlerStreamInit : public MockHandler { detail::CG::StorageInitHelper(getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()), - getArgs(), getKernelName(), *impl->MKernelNameBasedDataPtr, + getArgs(), getKernelName(), *impl->MDeviceKernelInfoPtr, getStreamStorage(), std::move(impl->MAuxiliaryResources), getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); From c2444de6f7d856d473a1c309348b97a7a8aa7598 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 22 Aug 2025 08:39:44 -0700 Subject: [PATCH 16/25] Update Linux ABI dump --- sycl/test/abi/sycl_symbols_linux.dump | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index f41c07ee394b..e9f65ce66248 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3314,6 +3314,7 @@ _ZN4sycl3_V16detail18get_kernel_id_implENS1_11string_viewE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextENS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18stringifyErrorCodeEi +_ZN4sycl3_V16detail19getDeviceKernelInfoERKNS1_27compile_time_kernel_info_v123CompileTimeKernelInfoTyE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain28ext_oneapi_has_device_globalENS1_11string_viewE @@ -3334,6 +3335,7 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv _ZN4sycl3_V16detail21LocalAccessorBaseHostC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE +_ZN4sycl3_V16detail22free_function_info_map3addEPKPKcPKjj _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKNS0_4spanIcLm18446744073709551615EEENS0_12bundle_stateE @@ -3590,6 +3592,7 @@ _ZN4sycl3_V17handler21setKernelWorkGroupMemEm _ZN4sycl3_V17handler21setUserFacingNodeTypeENS0_3ext6oneapi12experimental9node_typeE _ZN4sycl3_V17handler22ext_oneapi_fill2d_implEPvmPKvmmm _ZN4sycl3_V17handler22memcpyFromDeviceGlobalEPvPKvbmm +_ZN4sycl3_V17handler22setDeviceKernelInfoPtrEPNS0_6detail16DeviceKernelInfoE _ZN4sycl3_V17handler22setHandlerKernelBundleENS0_6kernelE _ZN4sycl3_V17handler22setHandlerKernelBundleERKSt10shared_ptrINS0_6detail18kernel_bundle_implEE _ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi1EEE @@ -3850,7 +3853,6 @@ _ZNK4sycl3_V16detail19kernel_bundle_plain5emptyEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost6getPtrEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv -_ZN4sycl3_V16detail22free_function_info_map3addEPKPKcPKjj _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost10getSamplerEv _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost12getNumOfDimsEv From 87543f53f069fb43ca83e28a1cfdb30e0480da80 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 22 Aug 2025 08:44:51 -0700 Subject: [PATCH 17/25] Appease clang-format --- sycl/include/sycl/detail/kernel_desc.hpp | 4 ++-- sycl/source/detail/program_manager/program_manager.cpp | 4 ++-- sycl/source/detail/program_manager/program_manager.hpp | 10 +++++----- sycl/source/detail/scheduler/commands.cpp | 6 +++--- sycl/unittests/helpers/MockKernelInfo.hpp | 1 - 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_desc.hpp b/sycl/include/sycl/detail/kernel_desc.hpp index a825e920e04f..fed6a4dbff2f 100644 --- a/sycl/include/sycl/detail/kernel_desc.hpp +++ b/sycl/include/sycl/detail/kernel_desc.hpp @@ -283,10 +283,10 @@ inline namespace compile_time_kernel_info_v1 { // at least for as long as we support user apps built with GNU libstdc++'s // pre-C++11 ABI. struct CompileTimeKernelInfoTy { - const char * const Name = nullptr; + const char *const Name = nullptr; const unsigned NumParams = 0; const bool IsESIMD = false; - const char * const FileName = ""; + const char *const FileName = ""; const char *const FunctionName = ""; const unsigned LineNumber = 0; const unsigned ColumnNumber = 0; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 760e86528bcd..684954565f9e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1820,8 +1820,8 @@ ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { return {}; } -DeviceKernelInfo & -ProgramManager::getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { +DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo( + const CompileTimeKernelInfoTy &Info) { auto Result = m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name}, Info); return Result.first->second; diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 967561333b1a..c547efa024e8 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -198,11 +198,11 @@ class ProgramManager { const DevImgPlainWithDeps *DevImgWithDeps = nullptr, const SerializedObj &SpecConsts = {}); - FastKernelCacheValPtr - getOrCreateKernel(context_impl &ContextImpl, device_impl &DeviceImpl, - KernelNameStrRefT KernelName, - DeviceKernelInfo &DeviceKernelInfo, - const NDRDescT &NDRDesc = {}); + FastKernelCacheValPtr getOrCreateKernel(context_impl &ContextImpl, + device_impl &DeviceImpl, + KernelNameStrRefT KernelName, + DeviceKernelInfo &DeviceKernelInfo, + const NDRDescT &NDRDesc = {}); ur_kernel_handle_t getCachedMaterializedKernel( KernelNameStrRefT KernelName, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 2fd94221e88b..211794a95733 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2102,9 +2102,9 @@ std::pair emitKernelInstrumentationData( xpti::stream_id_t StreamID, const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, - const std::string_view SyclKernelName, - DeviceKernelInfo &DeviceKernelInfo, queue_impl *Queue, - const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, + const std::string_view SyclKernelName, DeviceKernelInfo &DeviceKernelInfo, + queue_impl *Queue, const NDRDescT &NDRDesc, + detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs) { auto XptiObjects = std::make_pair(nullptr, -1); diff --git a/sycl/unittests/helpers/MockKernelInfo.hpp b/sycl/unittests/helpers/MockKernelInfo.hpp index 2908a91624ce..fd1b1ed9435a 100644 --- a/sycl/unittests/helpers/MockKernelInfo.hpp +++ b/sycl/unittests/helpers/MockKernelInfo.hpp @@ -29,7 +29,6 @@ struct MockKernelInfoBase { static constexpr const char *getFunctionName() { return ""; } static constexpr unsigned getLineNumber() { return 0; } static constexpr unsigned getColumnNumber() { return 0; } - }; } // namespace unittest From 035112578989e3d803c7678c59db69f4166910e0 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 22 Aug 2025 09:21:10 -0700 Subject: [PATCH 18/25] Rename files --- sycl/source/CMakeLists.txt | 4 ++-- .../{kernel_name_based_data.cpp => device_kernel_info.cpp} | 4 ++-- .../{kernel_name_based_data.hpp => device_kernel_info.hpp} | 2 +- ..._kernel_name_based_data.cpp => get_device_kernel_info.cpp} | 2 +- sycl/source/detail/global_handler.cpp | 2 +- sycl/source/detail/kernel_program_cache.hpp | 2 +- sycl/source/detail/program_manager/program_manager.hpp | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) rename sycl/source/detail/{kernel_name_based_data.cpp => device_kernel_info.cpp} (92%) rename sycl/source/detail/{kernel_name_based_data.hpp => device_kernel_info.hpp} (98%) rename sycl/source/detail/{get_kernel_name_based_data.cpp => get_device_kernel_info.cpp} (91%) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 93bb4b3a6812..38ffd232fcbb 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -288,8 +288,8 @@ set(SYCL_COMMON_SOURCES "detail/kernel_compiler/kernel_compiler_opencl.cpp" "detail/kernel_compiler/kernel_compiler_sycl.cpp" "detail/kernel_impl.cpp" - "detail/get_kernel_name_based_data.cpp" - "detail/kernel_name_based_data.cpp" + "detail/get_device_kernel_info.cpp" + "detail/device_kernel_info.cpp" "detail/kernel_program_cache.cpp" "detail/memory_export.cpp" "detail/memory_manager.cpp" diff --git a/sycl/source/detail/kernel_name_based_data.cpp b/sycl/source/detail/device_kernel_info.cpp similarity index 92% rename from sycl/source/detail/kernel_name_based_data.cpp rename to sycl/source/detail/device_kernel_info.cpp index cd7d191a4ee4..d8be7e64ce6d 100644 --- a/sycl/source/detail/kernel_name_based_data.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -1,11 +1,11 @@ -//==---------------------- kernel_name_based_data.cpp ----------------------==// +//==---------------------- device_kernel_info.cpp ----------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include +#include #include namespace sycl { diff --git a/sycl/source/detail/kernel_name_based_data.hpp b/sycl/source/detail/device_kernel_info.hpp similarity index 98% rename from sycl/source/detail/kernel_name_based_data.hpp rename to sycl/source/detail/device_kernel_info.hpp index f98cd853c997..b204ab6c63eb 100644 --- a/sycl/source/detail/kernel_name_based_data.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -1,4 +1,4 @@ -//==---------------------- kernel_name_based_data.hpp ----------------------==// +//==---------------------- device_kernel_info.hpp ----------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/source/detail/get_kernel_name_based_data.cpp b/sycl/source/detail/get_device_kernel_info.cpp similarity index 91% rename from sycl/source/detail/get_kernel_name_based_data.cpp rename to sycl/source/detail/get_device_kernel_info.cpp index 5cc54bc253a4..084eeeb60d71 100644 --- a/sycl/source/detail/get_kernel_name_based_data.cpp +++ b/sycl/source/detail/get_device_kernel_info.cpp @@ -1,4 +1,4 @@ -//==-------------------- get_kernel_name_based_data.cpp --------------------==// +//==-------------------- get_device_kernel_info.cpp --------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 7bdd1c489706..ed3bd82fb7e2 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -13,8 +13,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 34a6acd3753b..810679580465 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -10,8 +10,8 @@ #include "sycl/exception.hpp" #include +#include #include -#include #include #include #include diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index c547efa024e8..cc59f12bed5a 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -11,9 +11,9 @@ #include #include #include +#include #include #include -#include #include #include #include From b28f95ecb2c2c1135a59242c92961e4d72d86f10 Mon Sep 17 00:00:00 2001 From: Semenov Date: Fri, 22 Aug 2025 09:44:04 -0700 Subject: [PATCH 19/25] Update Windows ABI dump --- sycl/test/abi/sycl_symbols_windows.dump | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 8d6235784776..09ef387cffe9 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -229,9 +229,9 @@ ??$get_info_impl@Unative_vector_width_int@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_long@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_short@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ +??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_args@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_compute_units@device@info@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ -??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Uopencl_c_version@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uparent_device@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV012@XZ ??$get_info_impl@Upartition_affinity_domains@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4partition_affinity_domain@info@_V1@sycl@@V?$allocator@W4partition_affinity_domain@info@_V1@sycl@@@std@@@std@@XZ @@ -3841,7 +3841,7 @@ ?contextSetExtendedDeleter@pi@detail@_V1@sycl@@YAXAEBVcontext@34@P6AXPEAX@Z1@Z ?copyCodeLoc@handler@_V1@sycl@@AEAAXAEBV123@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?createKernelNameBasedCache@detail@_V1@sycl@@YAPEAUKernelNameBasedCacheT@123@XZ +?createKernelNameBasedCache@detail@_V1@sycl@@YAPEAVKernelNameBasedCacheT@123@XZ ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@PEAX_KAEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4064,6 +4064,7 @@ ?getCurrentDSODir@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ?getDeviceBackend@handler@_V1@sycl@@AEBA?AW4backend@23@XZ ?getDeviceFromHandler@detail@_V1@sycl@@YA?AVdevice@23@AEAVhandler@23@@Z +?getDeviceKernelInfo@detail@_V1@sycl@@YAAEAVDeviceKernelInfo@123@AEBUCompileTimeKernelInfoTy@compile_time_kernel_info_v1@123@@Z ?getDirName@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBD@Z ?getElemSize@AccessorBaseHost@detail@_V1@sycl@@QEBAIXZ ?getElementSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAHXZ @@ -4188,8 +4189,8 @@ ?get_impl@handler@_V1@sycl@@AEAAPEAVhandler_impl@detail@23@XZ ?get_kernel@kernel_bundle_plain@detail@_V1@sycl@@IEBA?AVkernel@34@AEBVkernel_id@34@@Z ?get_kernel_bundle@kernel@_V1@sycl@@QEBA?AV?$kernel_bundle@$01@23@XZ -?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z +?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@AEBV?$function@$$A6A_NAEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z@5@@Z ?get_kernel_id_impl@detail@_V1@sycl@@YA?AVkernel_id@23@Vstring_view@123@@Z @@ -4408,6 +4409,7 @@ ?setArgsHelper@handler@_V1@sycl@@AEAAXH@Z ?setArgsToAssociatedAccessors@handler@_V1@sycl@@AEAAXXZ ?setDevice@HostProfilingInfo@detail@_V1@sycl@@QEAAXPEAVdevice_impl@234@@Z +?setDeviceKernelInfoPtr@handler@_V1@sycl@@AEAAXPEAVDeviceKernelInfo@detail@23@@Z ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@@Z ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXVkernel@23@@Z ?setKernelCacheConfig@handler@_V1@sycl@@AEAAXW4StableKernelCacheConfig@123@@Z @@ -4417,7 +4419,7 @@ ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z -?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z +?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAVKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@0V?$id@$00@23@@Z @@ -4429,8 +4431,8 @@ ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ From 23fa732894d2ede8019cc5338f5a9f0432ae108b Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 22 Aug 2025 12:43:34 -0700 Subject: [PATCH 20/25] Extra asserts plus fixes for them --- sycl/source/detail/device_kernel_info.cpp | 7 ++- sycl/source/detail/device_kernel_info.hpp | 5 +++ .../program_manager/program_manager.cpp | 43 +++++++++++++++++++ sycl/source/handler.cpp | 1 + 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index d8be7e64ce6d..6d3d0d77d07b 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -13,7 +13,12 @@ inline namespace _V1 { namespace detail { DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) - : CompileTimeKernelInfoTy(Info) { + : CompileTimeKernelInfoTy(Info) +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + , + Name(Info.Name) +#endif +{ init(Name); } diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index b204ab6c63eb..46d7807bd429 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -89,6 +89,11 @@ struct FastKernelSubcacheT { // into this structure and get rid of the other KernelName -> * maps. class DeviceKernelInfo : public CompileTimeKernelInfoTy { public: +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // https://github.com/intel/llvm/pull/19117/files#r2294511096 + std::string Name; +#endif + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES DeviceKernelInfo() = default; #endif diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 684954565f9e..f3a1d475b448 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1820,10 +1820,53 @@ ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { return {}; } +template +inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, + const OtherTy &RHS) { + // This header states STL includes aren't allowed here, so can't use + // `std::tie(...) == std::tie(...)` idiom, and no C++20 for + // `operator==(...) = default`. + return std::string_view{LHS.Name} == std::string_view{RHS.Name} && + LHS.NumParams == RHS.NumParams && LHS.IsESIMD == RHS.IsESIMD && + std::string_view{LHS.FileName} == std::string_view{RHS.FileName} && + std::string_view{LHS.FunctionName} == + std::string_view{RHS.FunctionName} && + LHS.LineNumber == RHS.LineNumber && + LHS.ColumnNumber == RHS.ColumnNumber && + LHS.KernelSize == RHS.KernelSize && + LHS.ParamDescGetter == RHS.ParamDescGetter && + LHS.HasSpecialCaptures == RHS.HasSpecialCaptures; +} +template +inline constexpr bool operator!=(const CompileTimeKernelInfoTy &LHS, + const OtherTy &RHS) { + return !(LHS == RHS); +} +template inline void print(const InfoTy &Info) { + std::cout << "CompileTimeKernelInfoTy:" + << "\n Name: " << Info.Name << "\n NumParams: " << Info.NumParams + << "\n IsESIMD: " << Info.IsESIMD + << "\n FileName: " << Info.FileName + << "\n FunctionName: " << Info.FunctionName + << "\n LineNumber: " << Info.LineNumber + << "\n ColumnNumber: " << Info.ColumnNumber + << "\n KernelSize: " << Info.KernelSize + << "\n ParamDescGetter: " << Info.ParamDescGetter + << "\n HasSpecialCaptures: " << Info.HasSpecialCaptures + << std::endl; +} DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo( const CompileTimeKernelInfoTy &Info) { auto Result = m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name}, Info); + if (Info != Result.first->second) { + std::cout << "Info:" << std::endl; + print(Info); + std::cout << "Result:" << std::endl; + print(Result.first->second); + } + assert(Info == Result.first->second || + Info == CompileTimeKernelInfoTy{Info.Name}); return Result.first->second; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 39396cbf2151..8be25b93d435 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2613,6 +2613,7 @@ void handler::setKernelNameBasedCachePtr( void handler::setDeviceKernelInfoPtr( sycl::detail::DeviceKernelInfo *DeviceKernelInfoPtr) { + assert(!impl->MDeviceKernelInfoPtr && "Already set!"); impl->MDeviceKernelInfoPtr = DeviceKernelInfoPtr; } From 84e07bd2ac2d7261dc5d4e603f99dad0edef49a3 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 25 Aug 2025 08:14:06 -0700 Subject: [PATCH 21/25] Change to detail::string_view and allow lazy init of compile time info --- .../sycl/detail/compile_time_kernel_info.hpp | 51 ++++++++++++++++++ .../sycl/detail/get_device_kernel_info.hpp | 39 ++++++++++++++ sycl/include/sycl/detail/kernel_desc.hpp | 47 ---------------- sycl/include/sycl/handler.hpp | 1 + sycl/source/detail/device_kernel_info.cpp | 32 ++++++++++- sycl/source/detail/device_kernel_info.hpp | 8 ++- .../program_manager/program_manager.cpp | 53 ++++--------------- .../program_manager/program_manager.hpp | 5 +- .../include_deps/sycl_detail_core.hpp.cpp | 2 + 9 files changed, 139 insertions(+), 99 deletions(-) create mode 100644 sycl/include/sycl/detail/compile_time_kernel_info.hpp create mode 100644 sycl/include/sycl/detail/get_device_kernel_info.hpp diff --git a/sycl/include/sycl/detail/compile_time_kernel_info.hpp b/sycl/include/sycl/detail/compile_time_kernel_info.hpp new file mode 100644 index 000000000000..ec648e3288e2 --- /dev/null +++ b/sycl/include/sycl/detail/compile_time_kernel_info.hpp @@ -0,0 +1,51 @@ +//==--------------------- get_device_kernel_info.hpp -----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { +inline namespace compile_time_kernel_info_v1 { + +// This is being passed across ABI boundary, so we don't use std::string_view, +// at least for as long as we support user apps built with GNU libstdc++'s +// pre-C++11 ABI. +struct CompileTimeKernelInfoTy { + detail::string_view Name; + unsigned NumParams = 0; + bool IsESIMD = false; + detail::string_view FileName; + detail::string_view FunctionName; + unsigned LineNumber = 0; + unsigned ColumnNumber = 0; + int64_t KernelSize = 0; + using ParamDescGetterT = kernel_param_desc_t (*)(int); + ParamDescGetterT ParamDescGetter = nullptr; + bool HasSpecialCaptures = true; +}; + +template +inline constexpr CompileTimeKernelInfoTy CompileTimeKernelInfo{ + std::string_view(getKernelName()), + getKernelNumParams(), + isKernelESIMD(), + std::string_view(getKernelFileName()), + std::string_view(getKernelFunctionName()), + getKernelLineNumber(), + getKernelColumnNumber(), + getKernelSize(), + &getKernelParamDesc, + hasSpecialCaptures()}; + +} // namespace compile_time_kernel_info_v1 +} // namespace detail +} // namespace _V1 +} // namespace sycl \ No newline at end of file diff --git a/sycl/include/sycl/detail/get_device_kernel_info.hpp b/sycl/include/sycl/detail/get_device_kernel_info.hpp new file mode 100644 index 000000000000..34822f23ad00 --- /dev/null +++ b/sycl/include/sycl/detail/get_device_kernel_info.hpp @@ -0,0 +1,39 @@ +//==--------------------- get_device_kernel_info.hpp -----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +class DeviceKernelInfo; +// Lifetime of the underlying `DeviceKernelInfo` is tied to the availability of +// the `sycl_device_binaries` corresponding to this kernel. In other words, once +// user library is unloaded (see __sycl_unregister_lib), program manager destoys +// this `DeviceKernelInfo` object and the reference returned from here becomes +// stale. +__SYCL_EXPORT DeviceKernelInfo & +getDeviceKernelInfo(const CompileTimeKernelInfoTy &); + +template DeviceKernelInfo &getDeviceKernelInfo() { + static DeviceKernelInfo &Info = + getDeviceKernelInfo(CompileTimeKernelInfo); + return Info; +} + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +class KernelNameBasedCacheT; +__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); +#endif + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/kernel_desc.hpp b/sycl/include/sycl/detail/kernel_desc.hpp index fed6a4dbff2f..ae01f46c5705 100644 --- a/sycl/include/sycl/detail/kernel_desc.hpp +++ b/sycl/include/sycl/detail/kernel_desc.hpp @@ -277,53 +277,6 @@ template constexpr bool hasSpecialCaptures() { } return FoundSpecialCapture; } -inline namespace compile_time_kernel_info_v1 { - -// This is being passed across ABI boundary, so we don't use std::string_view, -// at least for as long as we support user apps built with GNU libstdc++'s -// pre-C++11 ABI. -struct CompileTimeKernelInfoTy { - const char *const Name = nullptr; - const unsigned NumParams = 0; - const bool IsESIMD = false; - const char *const FileName = ""; - const char *const FunctionName = ""; - const unsigned LineNumber = 0; - const unsigned ColumnNumber = 0; - const int64_t KernelSize = 0; - kernel_param_desc_t (*const ParamDescGetter)(int) = nullptr; - const bool HasSpecialCaptures = false; -}; - -template -inline constexpr CompileTimeKernelInfoTy CompileTimeKernelInfo{ - getKernelName(), getKernelNumParams(), - isKernelESIMD(), getKernelFileName(), - getKernelFunctionName(), getKernelLineNumber(), - getKernelColumnNumber(), getKernelSize(), - &getKernelParamDesc, hasSpecialCaptures()}; -} // namespace compile_time_kernel_info_v1 - -class DeviceKernelInfo; -// Lifetime of the underlying `DeviceKernelInfo` is tied to the availability of -// the `sycl_device_binaries` corresponding to this kernel. In other words, once -// user library is unloaded (see __sycl_unregister_lib), program manager destoys -// this `DeviceKernelInfo` object and the reference returned from here becomes -// stale. -__SYCL_EXPORT DeviceKernelInfo & -getDeviceKernelInfo(const CompileTimeKernelInfoTy &); - -template DeviceKernelInfo &getDeviceKernelInfo() { - static DeviceKernelInfo &Info = - getDeviceKernelInfo(CompileTimeKernelInfo); - return Info; -} - -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES -class KernelNameBasedCacheT; -__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); -#endif - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index ee17b2d14ddb..a17358775e81 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index 6d3d0d77d07b..371c6d095983 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -16,10 +16,10 @@ DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) : CompileTimeKernelInfoTy(Info) #ifndef __INTEL_PREVIEW_BREAKING_CHANGES , - Name(Info.Name) + Name(Info.Name.data()) #endif { - init(Name); + init(Name.data()); } void DeviceKernelInfo::init(KernelNameStrRefT KernelName) { @@ -38,6 +38,32 @@ void DeviceKernelInfo::initIfNeeded(KernelNameStrRefT KernelName) { } #endif +template +inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, + const OtherTy &RHS) { + + // TODO replace with std::tie(...) == std::tie(...) once there is + // implicit conversion from detail to std string_view. + return std::string_view{LHS.Name} == std::string_view{RHS.Name} && + LHS.NumParams == RHS.NumParams && LHS.IsESIMD == RHS.IsESIMD && + std::string_view{LHS.FileName} == std::string_view{RHS.FileName} && + std::string_view{LHS.FunctionName} == + std::string_view{RHS.FunctionName} && + LHS.LineNumber == RHS.LineNumber && + LHS.ColumnNumber == RHS.ColumnNumber && + LHS.KernelSize == RHS.KernelSize && + LHS.ParamDescGetter == RHS.ParamDescGetter && + LHS.HasSpecialCaptures == RHS.HasSpecialCaptures; +} + +void DeviceKernelInfo::setCompileTimeInfoIfNeeded( + const CompileTimeKernelInfoTy &Info) { + if (isCompileTimeInfoSet()) + CompileTimeKernelInfoTy::operator=(Info); + assert(isCompileTimeInfoSet()); + assert(Info == *this); +} + FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() { assertInitialized(); return MFastKernelSubcache; @@ -51,6 +77,8 @@ const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() { return MImplicitLocalArgPos; } +bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; } + void DeviceKernelInfo::assertInitialized() { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES assert(MInitialized.load() && "Data needs to be initialized before use"); diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 46d7807bd429..acf524030b68 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -90,7 +90,8 @@ struct FastKernelSubcacheT { class DeviceKernelInfo : public CompileTimeKernelInfoTy { public: #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - // https://github.com/intel/llvm/pull/19117/files#r2294511096 + // Needs to own the kernel name string in non-preview builds since we pass it + // using a temporary string instead of a string view there. std::string Name; #endif @@ -103,12 +104,15 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES void initIfNeeded(KernelNameStrRefT KernelName); #endif + void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info); + FastKernelSubcacheT &getKernelSubcache(); bool usesAssert(); const std::optional &getImplicitLocalArgPos(); private: void assertInitialized(); + bool isCompileTimeInfoSet() const; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES std::atomic MInitialized = false; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index f3a1d475b448..f1a84b92b622 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1820,53 +1820,18 @@ ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { return {}; } -template -inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, - const OtherTy &RHS) { - // This header states STL includes aren't allowed here, so can't use - // `std::tie(...) == std::tie(...)` idiom, and no C++20 for - // `operator==(...) = default`. - return std::string_view{LHS.Name} == std::string_view{RHS.Name} && - LHS.NumParams == RHS.NumParams && LHS.IsESIMD == RHS.IsESIMD && - std::string_view{LHS.FileName} == std::string_view{RHS.FileName} && - std::string_view{LHS.FunctionName} == - std::string_view{RHS.FunctionName} && - LHS.LineNumber == RHS.LineNumber && - LHS.ColumnNumber == RHS.ColumnNumber && - LHS.KernelSize == RHS.KernelSize && - LHS.ParamDescGetter == RHS.ParamDescGetter && - LHS.HasSpecialCaptures == RHS.HasSpecialCaptures; -} -template -inline constexpr bool operator!=(const CompileTimeKernelInfoTy &LHS, - const OtherTy &RHS) { - return !(LHS == RHS); -} -template inline void print(const InfoTy &Info) { - std::cout << "CompileTimeKernelInfoTy:" - << "\n Name: " << Info.Name << "\n NumParams: " << Info.NumParams - << "\n IsESIMD: " << Info.IsESIMD - << "\n FileName: " << Info.FileName - << "\n FunctionName: " << Info.FunctionName - << "\n LineNumber: " << Info.LineNumber - << "\n ColumnNumber: " << Info.ColumnNumber - << "\n KernelSize: " << Info.KernelSize - << "\n ParamDescGetter: " << Info.ParamDescGetter - << "\n HasSpecialCaptures: " << Info.HasSpecialCaptures - << std::endl; -} DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo( const CompileTimeKernelInfoTy &Info) { auto Result = - m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name}, Info); - if (Info != Result.first->second) { - std::cout << "Info:" << std::endl; - print(Info); - std::cout << "Result:" << std::endl; - print(Result.first->second); - } - assert(Info == Result.first->second || - Info == CompileTimeKernelInfoTy{Info.Name}); + m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name.data()}, Info); + Result.first->second.setCompileTimeInfoIfNeeded(Info); + return Result.first->second; +} + +DeviceKernelInfo & +ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) { + auto Result = m_DeviceKernelInfoMap.try_emplace( + KernelName, CompileTimeKernelInfoTy{std::string_view(KernelName)}); return Result.first->second; } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index cc59f12bed5a..63a1a61b55c6 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -378,10 +378,7 @@ class ProgramManager { DeviceKernelInfo & getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); - DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) { - return getOrCreateDeviceKernelInfo( - CompileTimeKernelInfoTy{KernelName.data()}); - } + DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName); std::set getRawDeviceImages(const std::vector &KernelIDs); diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index e03d3eae08c0..9a0791c5e913 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -130,6 +130,8 @@ // CHECK-NEXT: CL/cl_version.h // CHECK-NEXT: CL/cl_platform.h // CHECK-NEXT: CL/cl_ext.h +// CHECK-NEXT: detail/get_device_kernel_info.hpp +// CHECK-NEXT: detail/compile_time_kernel_info.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp From a1cd6c85bd15165eed0e794968372886e065d82e Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 26 Aug 2025 06:47:43 -0700 Subject: [PATCH 22/25] Minor fixes --- sycl/include/sycl/detail/compile_time_kernel_info.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/detail/compile_time_kernel_info.hpp b/sycl/include/sycl/detail/compile_time_kernel_info.hpp index ec648e3288e2..f2eb59e874cd 100644 --- a/sycl/include/sycl/detail/compile_time_kernel_info.hpp +++ b/sycl/include/sycl/detail/compile_time_kernel_info.hpp @@ -1,4 +1,4 @@ -//==--------------------- get_device_kernel_info.hpp -----------------------==// +//==------------------- compile_time_kernel_info.hpp -----------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,8 +22,8 @@ struct CompileTimeKernelInfoTy { detail::string_view Name; unsigned NumParams = 0; bool IsESIMD = false; - detail::string_view FileName; - detail::string_view FunctionName; + detail::string_view FileName{}; + detail::string_view FunctionName{}; unsigned LineNumber = 0; unsigned ColumnNumber = 0; int64_t KernelSize = 0; @@ -48,4 +48,4 @@ inline constexpr CompileTimeKernelInfoTy CompileTimeKernelInfo{ } // namespace compile_time_kernel_info_v1 } // namespace detail } // namespace _V1 -} // namespace sycl \ No newline at end of file +} // namespace sycl From 37f68c81f68780a60d3d3fd43ebdaa6dbd2b5fdd Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 26 Aug 2025 12:24:28 -0700 Subject: [PATCH 23/25] Fix Windows ABI break --- sycl/include/sycl/detail/get_device_kernel_info.hpp | 2 +- sycl/source/detail/global_handler.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/detail/get_device_kernel_info.hpp b/sycl/include/sycl/detail/get_device_kernel_info.hpp index 34822f23ad00..021f4077b9a3 100644 --- a/sycl/include/sycl/detail/get_device_kernel_info.hpp +++ b/sycl/include/sycl/detail/get_device_kernel_info.hpp @@ -30,7 +30,7 @@ template DeviceKernelInfo &getDeviceKernelInfo() { } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES -class KernelNameBasedCacheT; +struct KernelNameBasedCacheT; __SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); #endif diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 9f03b929e4cc..ec7bf7da48b6 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -30,7 +30,7 @@ class ods_target_list; class XPTIRegistry; class ThreadPool; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES -class KernelNameBasedCacheT; +struct KernelNameBasedCacheT; class DeviceKernelInfo; #endif From 223f28d0d8d7428ca7386551b50137821860d8f9 Mon Sep 17 00:00:00 2001 From: Semenov Date: Tue, 26 Aug 2025 13:47:31 -0700 Subject: [PATCH 24/25] Update Windows ABI dump --- sycl/test/abi/sycl_symbols_windows.dump | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 09ef387cffe9..eae017c88eac 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3841,7 +3841,7 @@ ?contextSetExtendedDeleter@pi@detail@_V1@sycl@@YAXAEBVcontext@34@P6AXPEAX@Z1@Z ?copyCodeLoc@handler@_V1@sycl@@AEAAXAEBV123@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?createKernelNameBasedCache@detail@_V1@sycl@@YAPEAVKernelNameBasedCacheT@123@XZ +?createKernelNameBasedCache@detail@_V1@sycl@@YAPEAUKernelNameBasedCacheT@123@XZ ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@PEAX_KAEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4419,7 +4419,7 @@ ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z -?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAVKernelNameBasedCacheT@detail@23@@Z +?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$00@23@0V?$id@$00@23@@Z From 10409637a3d48d49f568160ba297b59cffa239ce Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 27 Aug 2025 09:41:46 -0700 Subject: [PATCH 25/25] Drop extra line --- sycl/source/detail/device_kernel_info.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index 371c6d095983..30f2db1ec40b 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -41,7 +41,6 @@ void DeviceKernelInfo::initIfNeeded(KernelNameStrRefT KernelName) { template inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, const OtherTy &RHS) { - // TODO replace with std::tie(...) == std::tie(...) once there is // implicit conversion from detail to std string_view. return std::string_view{LHS.Name} == std::string_view{RHS.Name} &&