Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
7a259f0
[SYCL] Refactor kernel name based cache approach
sergey-semenov Aug 19, 2025
2ae5678
Merge 7a259f088541aa25acd1bd2b7a10f195d945445b into ea192fd860d67499f…
sergey-semenov Aug 19, 2025
7fde713
Update Windows ABI
sergey-semenov Aug 19, 2025
b38a467
Assert data presence in CG
sergey-semenov Aug 19, 2025
8e41e7a
Misc changes
sergey-semenov Aug 19, 2025
272bc61
Extend cleanup tests coverage
sergey-semenov Aug 19, 2025
bdb5b28
Merge branch 'sycl' into refactorcache
sergey-semenov Aug 20, 2025
57553b7
Add `CompileTimeKernelInfoTy` and unimplemented `getDeviceKernelInfo`
aelovikov-intel Aug 21, 2025
abd48ae
Rename KernelNameBasedData -> DeviceKernelInfo
aelovikov-intel Aug 21, 2025
a403873
Revert ABI dumps
aelovikov-intel Aug 21, 2025
f96055b
Switch to `getDeviceKernelInfo<Kernel>()`
aelovikov-intel Aug 21, 2025
2908935
Update program_manager's signature for getOrCreateDeviceKernelInfo
aelovikov-intel Aug 21, 2025
cceffaf
Drop `sycl/detail/get_kernel_name_based_data.hpp`
aelovikov-intel Aug 21, 2025
7fa2519
class DeviceKernelInfo : public CompileTimeKernelInfoTy
aelovikov-intel Aug 21, 2025
ed389b6
Drop ABINeutralKernelNameStrRefT
aelovikov-intel Aug 21, 2025
66f4928
Adjust unit tests
sergey-semenov Aug 22, 2025
5fc91a5
Merge branch 'sycl' into refactorcache
sergey-semenov Aug 22, 2025
fa50f3f
Complete the renaming
sergey-semenov Aug 22, 2025
c2444de
Update Linux ABI dump
sergey-semenov Aug 22, 2025
87543f5
Appease clang-format
sergey-semenov Aug 22, 2025
0351125
Rename files
sergey-semenov Aug 22, 2025
b28f95e
Update Windows ABI dump
sergey-semenov Aug 22, 2025
23fa732
Extra asserts plus fixes for them
aelovikov-intel Aug 22, 2025
84e07bd
Change to detail::string_view and allow lazy init of compile time info
sergey-semenov Aug 25, 2025
a1cd6c8
Minor fixes
sergey-semenov Aug 26, 2025
2a51c3a
Merge branch 'sycl' into refactorcache
sergey-semenov Aug 26, 2025
737e745
Merge branch 'sycl' into refactorcache
sergey-semenov Aug 26, 2025
37f68c8
Fix Windows ABI break
sergey-semenov Aug 26, 2025
223f28d
Update Windows ABI dump
sergey-semenov Aug 26, 2025
eb7b242
Merge branch 'sycl' into refactorcache
sergey-semenov Aug 27, 2025
1040963
Drop extra line
sergey-semenov Aug 27, 2025
e950dfd
Merge branch 'sycl' into refactorcache
sergey-semenov Aug 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions sycl/include/sycl/detail/compile_time_kernel_info.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//==------------------- compile_time_kernel_info.hpp -----------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <sycl/detail/kernel_desc.hpp>
#include <sycl/detail/string_view.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {
inline namespace compile_time_kernel_info_v1 {

// This is being passed across ABI boundary, so we don't use std::string_view,
// at least for as long as we support user apps built with GNU libstdc++'s
// pre-C++11 ABI.
struct CompileTimeKernelInfoTy {
detail::string_view Name;
unsigned NumParams = 0;
bool IsESIMD = false;
detail::string_view FileName{};
detail::string_view FunctionName{};
unsigned LineNumber = 0;
unsigned ColumnNumber = 0;
int64_t KernelSize = 0;
using ParamDescGetterT = kernel_param_desc_t (*)(int);
ParamDescGetterT ParamDescGetter = nullptr;
bool HasSpecialCaptures = true;
};

template <class Kernel>
inline constexpr CompileTimeKernelInfoTy CompileTimeKernelInfo{
std::string_view(getKernelName<Kernel>()),
getKernelNumParams<Kernel>(),
isKernelESIMD<Kernel>(),
std::string_view(getKernelFileName<Kernel>()),
std::string_view(getKernelFunctionName<Kernel>()),
getKernelLineNumber<Kernel>(),
getKernelColumnNumber<Kernel>(),
getKernelSize<Kernel>(),
&getKernelParamDesc<Kernel>,
hasSpecialCaptures<Kernel>()};

} // namespace compile_time_kernel_info_v1
} // namespace detail
} // namespace _V1
} // namespace sycl
39 changes: 39 additions & 0 deletions sycl/include/sycl/detail/get_device_kernel_info.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//==--------------------- get_device_kernel_info.hpp -----------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <sycl/detail/compile_time_kernel_info.hpp>
#include <sycl/detail/kernel_desc.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {

class DeviceKernelInfo;
// Lifetime of the underlying `DeviceKernelInfo` is tied to the availability of
// the `sycl_device_binaries` corresponding to this kernel. In other words, once
// user library is unloaded (see __sycl_unregister_lib), program manager destoys
// this `DeviceKernelInfo` object and the reference returned from here becomes
// stale.
__SYCL_EXPORT DeviceKernelInfo &
getDeviceKernelInfo(const CompileTimeKernelInfoTy &);

template <class Kernel> DeviceKernelInfo &getDeviceKernelInfo() {
static DeviceKernelInfo &Info =
getDeviceKernelInfo(CompileTimeKernelInfo<Kernel>);
return Info;
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
struct KernelNameBasedCacheT;
__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache();
#endif

} // namespace detail
} // namespace _V1
} // namespace sycl
1 change: 0 additions & 1 deletion sycl/include/sycl/detail/kernel_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,6 @@ template <typename KernelNameType> constexpr bool hasSpecialCaptures() {
}
return FoundSpecialCapture;
}

} // namespace detail
} // namespace _V1
} // namespace sycl
29 changes: 0 additions & 29 deletions sycl/include/sycl/detail/kernel_name_based_cache.hpp

This file was deleted.

7 changes: 5 additions & 2 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
#include <sycl/detail/common.hpp>
#include <sycl/detail/defines_elementary.hpp>
#include <sycl/detail/export.hpp>
#include <sycl/detail/get_device_kernel_info.hpp>
#include <sycl/detail/id_queries_fit_in_int.hpp>
#include <sycl/detail/impl_utils.hpp>
#include <sycl/detail/kernel_desc.hpp>
#include <sycl/detail/kernel_launch_helper.hpp>
#include <sycl/detail/kernel_name_based_cache.hpp>
#include <sycl/detail/kernel_name_str_t.hpp>
#include <sycl/detail/reduction_forward.hpp>
#include <sycl/detail/string.hpp>
Expand Down Expand Up @@ -863,14 +863,14 @@ class __SYCL_EXPORT handler {
constexpr std::string_view KernelNameStr =
detail::getKernelName<KernelName>();
MKernelName = KernelNameStr;
setDeviceKernelInfoPtr(&detail::getDeviceKernelInfo<KernelName>());
} else {
// In case w/o the integration header it is necessary to process
// accessors from the list(which are associated with this handler) as
// arguments. We must copy the associated accessors as they are checked
// later during finalize.
setArgsToAssociatedAccessors();
}
setKernelNameBasedCachePtr(detail::getKernelNameBasedCache<KernelName>());

// If the kernel lambda is callable with a kernel_handler argument, manifest
// the associated kernel handler.
Expand Down Expand Up @@ -3685,8 +3685,11 @@ class __SYCL_EXPORT handler {
sycl::handler &h, size_t size,
const ext::oneapi::experimental::memory_pool &pool);

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
void setKernelNameBasedCachePtr(
detail::KernelNameBasedCacheT *KernelNameBasedCachePtr);
#endif
void setDeviceKernelInfoPtr(detail::DeviceKernelInfo *DeviceKernelInfoPtr);

queue getQueue();

Expand Down
3 changes: 2 additions & 1 deletion sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ set(SYCL_COMMON_SOURCES
"detail/kernel_compiler/kernel_compiler_opencl.cpp"
"detail/kernel_compiler/kernel_compiler_sycl.cpp"
"detail/kernel_impl.cpp"
"detail/kernel_name_based_cache.cpp"
"detail/get_device_kernel_info.cpp"
"detail/device_kernel_info.cpp"
"detail/kernel_program_cache.cpp"
"detail/memory_export.cpp"
"detail/memory_manager.cpp"
Expand Down
8 changes: 3 additions & 5 deletions sycl/source/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ class CGExecKernel : public CG {
std::shared_ptr<detail::kernel_bundle_impl> MKernelBundle;
std::vector<ArgDesc> MArgs;
KernelNameStrT MKernelName;
KernelNameBasedCacheT *MKernelNameBasedCachePtr;
DeviceKernelInfo &MDeviceKernelInfo;
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
/// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list
Expand All @@ -269,8 +269,7 @@ class CGExecKernel : public CG {
std::shared_ptr<detail::kernel_impl> SyclKernel,
std::shared_ptr<detail::kernel_bundle_impl> KernelBundle,
CG::StorageInitHelper CGData, std::vector<ArgDesc> Args,
KernelNameStrT KernelName,
KernelNameBasedCacheT *KernelNameBasedCachePtr,
KernelNameStrT KernelName, DeviceKernelInfo &DeviceKernelInfo,
std::vector<std::shared_ptr<detail::stream_impl>> Streams,
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
CGType Type, ur_kernel_cache_config_t KernelCacheConfig,
Expand All @@ -279,8 +278,7 @@ class CGExecKernel : public CG {
: CG(Type, std::move(CGData), std::move(loc)), MNDRDesc(NDRDesc),
MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)),
MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)),
MKernelName(std::move(KernelName)),
MKernelNameBasedCachePtr(KernelNameBasedCachePtr),
MKernelName(std::move(KernelName)), MDeviceKernelInfo(DeviceKernelInfo),
MStreams(std::move(Streams)),
MAuxiliaryResources(std::move(AuxiliaryResources)),
MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)),
Expand Down
90 changes: 90 additions & 0 deletions sycl/source/detail/device_kernel_info.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//==---------------------- device_kernel_info.cpp ----------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <detail/device_kernel_info.hpp>
#include <detail/program_manager/program_manager.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {

DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info)
: CompileTimeKernelInfoTy(Info)
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
,
Name(Info.Name.data())
#endif
{
init(Name.data());
}

void DeviceKernelInfo::init(KernelNameStrRefT KernelName) {
auto &PM = detail::ProgramManager::getInstance();
MUsesAssert = PM.kernelUsesAssert(KernelName);
MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName);
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
MInitialized.store(true);
#endif
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
void DeviceKernelInfo::initIfNeeded(KernelNameStrRefT KernelName) {
if (!MInitialized.load())
init(KernelName);
}
#endif

template <typename OtherTy>
inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS,
const OtherTy &RHS) {

// TODO replace with std::tie(...) == std::tie(...) once there is
// implicit conversion from detail to std string_view.
return std::string_view{LHS.Name} == std::string_view{RHS.Name} &&
LHS.NumParams == RHS.NumParams && LHS.IsESIMD == RHS.IsESIMD &&
std::string_view{LHS.FileName} == std::string_view{RHS.FileName} &&
std::string_view{LHS.FunctionName} ==
std::string_view{RHS.FunctionName} &&
LHS.LineNumber == RHS.LineNumber &&
LHS.ColumnNumber == RHS.ColumnNumber &&
LHS.KernelSize == RHS.KernelSize &&
LHS.ParamDescGetter == RHS.ParamDescGetter &&
LHS.HasSpecialCaptures == RHS.HasSpecialCaptures;
}

void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
const CompileTimeKernelInfoTy &Info) {
if (isCompileTimeInfoSet())
CompileTimeKernelInfoTy::operator=(Info);
assert(isCompileTimeInfoSet());
Comment on lines +60 to +62
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sergey-semenov , what did you mean by those lines?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops, this should be if(!isCompileTimeInfoSet())

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Why isn't it failing anywhere?
  2. When I tried the change (although on top of my other changes), things broke for me. I'm not sure if it'd be clean on trunk either. Will you follow up please?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why isn't it failing anywhere?

That probably indicates lack of test coverage for this scenario. If that's true, I don't think it's worth adding since my next PR will essentially make all kernel info behave like this (i.e. runtime information will be filled out during image registration, compile time information will be filled out during the first access to device kernel info with compile time information available).

When I tried the change (although on top of my other changes), things broke for me. I'm not sure if it'd be clean on trunk either. Will you follow up please?

I'll look into it, let's see if #20003 hits any failures in precommit.

assert(Info == *this);
}

FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
assertInitialized();
return MFastKernelSubcache;
}
bool DeviceKernelInfo::usesAssert() {
assertInitialized();
return MUsesAssert;
}
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
assertInitialized();
return MImplicitLocalArgPos;
}

bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }

void DeviceKernelInfo::assertInitialized() {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
assert(MInitialized.load() && "Data needs to be initialized before use");
#endif
}

} // namespace detail
} // namespace _V1
} // namespace sycl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//==-------------------- kernel_name_based_cache_t.hpp ---------------------==//
//==---------------------- device_kernel_info.hpp ----------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand All @@ -10,6 +10,8 @@
#include <detail/hashers.hpp>
#include <detail/kernel_arg_mask.hpp>
#include <emhash/hash_table8.hpp>
#include <sycl/detail/compile_time_kernel_info.hpp>
#include <sycl/detail/kernel_name_str_t.hpp>
#include <sycl/detail/spinlock.hpp>
#include <sycl/detail/ur.hpp>

Expand All @@ -23,9 +25,9 @@ using FastKernelCacheKeyT = std::pair<ur_device_handle_t, ur_context_handle_t>;

struct FastKernelCacheVal {
Managed<ur_kernel_handle_t> MKernelHandle; /* UR kernel. */
std::mutex *MMutex; /* Mutex guarding this kernel. When
caching is disabled, the pointer is
nullptr. */
std::mutex *MMutex; /* Mutex guarding this kernel. When
caching is disabled, the pointer is
nullptr. */
const KernelArgMask *MKernelArgMask; /* Eliminated kernel argument mask. */
Managed<ur_program_handle_t> MProgramHandle; /* UR program handle
corresponding to this kernel. */
Expand Down Expand Up @@ -71,18 +73,53 @@ struct FastKernelEntryT {

using FastKernelSubcacheEntriesT = std::vector<FastKernelEntryT>;

// Structure for caching built kernels with a specific name.
// Used by instances of the kernel program cache class (potentially multiple).
struct FastKernelSubcacheT {
FastKernelSubcacheEntriesT Entries;
FastKernelSubcacheMutexT Mutex;
};

struct KernelNameBasedCacheT {
FastKernelSubcacheT FastKernelSubcache;
std::optional<bool> UsesAssert;
// Implicit local argument position is represented by an optional int, this
// uses another optional on top of that to represent lazy initialization of
// the cached value.
std::optional<std::optional<int>> ImplicitLocalArgPos;
// This class aggregates information specific to device kernels (i.e.
// information that is uniform between different submissions of the same
// kernel). Pointers to instances of this class are stored in header function
// templates as a static variable to avoid repeated runtime lookup overhead.
// TODO Currently this class duplicates information fetched from the program
// manager. Instead, we should merge all of this information
// into this structure and get rid of the other KernelName -> * maps.
class DeviceKernelInfo : public CompileTimeKernelInfoTy {
public:
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// Needs to own the kernel name string in non-preview builds since we pass it
// using a temporary string instead of a string view there.
std::string Name;
#endif

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
DeviceKernelInfo() = default;
#endif
DeviceKernelInfo(const CompileTimeKernelInfoTy &Info);

void init(KernelNameStrRefT KernelName);
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
void initIfNeeded(KernelNameStrRefT KernelName);
#endif
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);

FastKernelSubcacheT &getKernelSubcache();
bool usesAssert();
const std::optional<int> &getImplicitLocalArgPos();

private:
void assertInitialized();
bool isCompileTimeInfoSet() const;

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
std::atomic<bool> MInitialized = false;
#endif
FastKernelSubcacheT MFastKernelSubcache;
bool MUsesAssert;
std::optional<int> MImplicitLocalArgPos;
};

} // namespace detail
Expand Down
Loading
Loading