Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 26 additions & 13 deletions sycl/source/detail/device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,10 @@ DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info)
Name(Info.Name.data())
#endif
{
init(Name.data());
}

void DeviceKernelInfo::init(KernelNameStrRefT KernelName) {
auto &PM = detail::ProgramManager::getInstance();
MUsesAssert = PM.kernelUsesAssert(KernelName);
MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName);
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// Non-legacy implementation either fills out the data during image
// registration after this constructor is called, or uses default values
// if this instance of DeviceKernelInfo corresponds to an interop kernel.
MInitialized.store(true);
#endif
}
Expand All @@ -36,9 +32,19 @@ void DeviceKernelInfo::initIfEmpty(const CompileTimeKernelInfoTy &Info) {
if (MInitialized.load())
return;

CompileTimeKernelInfoTy::operator=(Info);
Name = Info.Name.data();
init(Name.data());
// If this function is called, then this is a default initialized
// device kernel info created from older headers and stored in global handler.
// In that case, fetch the proper instance from program manager and copy its
// values.
auto &PM = detail::ProgramManager::getInstance();
DeviceKernelInfo &PMDeviceKernelInfo =
PM.getDeviceKernelInfo(KernelNameStrRefT(Info.Name));

PMDeviceKernelInfo.CompileTimeKernelInfoTy::operator=(Info);
PMDeviceKernelInfo.Name = Info.Name.data();

MUsesAssert = PMDeviceKernelInfo.MUsesAssert;
MImplicitLocalArgPos = PMDeviceKernelInfo.MImplicitLocalArgPos;
}
#endif

Expand Down Expand Up @@ -78,18 +84,25 @@ FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
assertInitialized();
return MFastKernelSubcache;
}
bool DeviceKernelInfo::usesAssert() {
bool DeviceKernelInfo::usesAssert() const {
assertInitialized();
return MUsesAssert;
}
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() const {
assertInitialized();
return MImplicitLocalArgPos;
}

void DeviceKernelInfo::setUsesAssert() { MUsesAssert = true; }

void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) {
assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos);
MImplicitLocalArgPos = Pos;
}

bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }

void DeviceKernelInfo::assertInitialized() {
void DeviceKernelInfo::assertInitialized() const {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
assert(MInitialized.load() && "Data needs to be initialized before use");
#endif
Expand Down
9 changes: 6 additions & 3 deletions sycl/source/detail/device_kernel_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,14 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);

FastKernelSubcacheT &getKernelSubcache();
bool usesAssert();
const std::optional<int> &getImplicitLocalArgPos();
bool usesAssert() const;
const std::optional<int> &getImplicitLocalArgPos() const;

void setUsesAssert();
void setImplicitLocalArgPos(int Pos);

private:
void assertInitialized();
void assertInitialized() const;
bool isCompileTimeInfoSet() const;

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/get_device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ KernelNameBasedCacheT *createKernelNameBasedCache() {
#endif

DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info);
return ProgramManager::getInstance().getDeviceKernelInfo(Info);
}

} // namespace detail
Expand Down
23 changes: 18 additions & 5 deletions sycl/source/detail/kernel_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
MCreatedFromSource(true),
MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this()
: nullptr),
MIsInterop(true), MKernelArgMaskPtr{ArgMask},
MInteropDeviceKernelInfo(createCompileTimeKernelInfo(getName())) {
MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true),
MDeviceKernelInfo(createCompileTimeKernelInfo(getName())) {
ur_context_handle_t UrContext = nullptr;
// Using the adapter from the passed ContextImpl
getAdapter().call<UrApiKind::urKernelGetInfo>(
Expand All @@ -59,9 +59,11 @@ kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
MKernelBundleImpl(KernelBundleImpl.shared_from_this()),
MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop),
MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex},
MInteropDeviceKernelInfo(MIsInterop
? createCompileTimeKernelInfo(getName())
: createCompileTimeKernelInfo()) {
MOwnsDeviceKernelInfo(checkOwnsDeviceKernelInfo()),
MDeviceKernelInfo(MOwnsDeviceKernelInfo
? createCompileTimeKernelInfo(getName())
: createCompileTimeKernelInfo()) {

// Enable USM indirect access for interop and non-sycl-jit source kernels.
// sycl-jit kernels will enable this if needed through the regular kernel
// path.
Expand Down Expand Up @@ -121,6 +123,17 @@ std::string_view kernel_impl::getName() const {
return MName;
}

bool kernel_impl::checkOwnsDeviceKernelInfo() {
// If the image originates from something other than standard offline
// compilation, this kernel needs to own its info structure.
// We could also have a mixed origin image, in which case the device kernel
// info might reside in program manager.
return MDeviceImageImpl->getOriginMask() != ImageOriginSYCLOffline &&
(!(MDeviceImageImpl->getOriginMask() & ImageOriginSYCLOffline) ||
!ProgramManager::getInstance().tryGetDeviceKernelInfo(
static_cast<KernelNameStrT>(getName())));
}

bool kernel_impl::isBuiltInKernel(device_impl &Device) const {
auto BuiltInKernels = Device.get_info<info::device::built_in_kernel_ids>();
if (BuiltInKernels.empty())
Expand Down
13 changes: 8 additions & 5 deletions sycl/source/detail/kernel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,11 @@ class kernel_impl {
std::mutex *getCacheMutex() const { return MCacheMutex; }
std::string_view getName() const;

bool checkOwnsDeviceKernelInfo();
DeviceKernelInfo &getDeviceKernelInfo() {
return MIsInterop
? MInteropDeviceKernelInfo
: ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
return MOwnsDeviceKernelInfo
? MDeviceKernelInfo
: ProgramManager::getInstance().getDeviceKernelInfo(
Comment on lines +244 to +246
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just have a reference/pointer as the member (potentially pointing to the owning smart pointer member) instead of doing those lookups?

Copy link
Contributor Author

@sergey-semenov sergey-semenov Oct 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we should. I was planning that as a follow-up change since the lookup is already there right now.

KernelNameStrT(getName()));
}

Expand All @@ -259,9 +260,11 @@ class kernel_impl {
std::mutex *MCacheMutex = nullptr;
mutable std::string MName;

// It is used for the interop kernels only.
// Used for images that aren't obtained with standard SYCL offline
// compilation.
// For regular kernel we get DeviceKernelInfo from the ProgramManager.
DeviceKernelInfo MInteropDeviceKernelInfo;
bool MOwnsDeviceKernelInfo = false;
DeviceKernelInfo MDeviceKernelInfo;

bool isBuiltInKernel(device_impl &Device) const;
void checkIfValidForNumArgsInfoQuery() const;
Expand Down
53 changes: 28 additions & 25 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1797,8 +1797,11 @@ void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) {
const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
Img.getAssertUsed();
if (AssertUsedRange.isAvailable())
for (const auto &Prop : AssertUsedRange)
m_KernelUsesAssert.insert(Prop->Name);
for (const auto &Prop : AssertUsedRange) {
auto It = m_DeviceKernelInfoMap.find(Prop->Name);
assert(It != m_DeviceKernelInfoMap.end());
It->second.setUsesAssert();
}
}

void ProgramManager::cacheKernelImplicitLocalArg(
Expand All @@ -1807,36 +1810,34 @@ void ProgramManager::cacheKernelImplicitLocalArg(
Img.getImplicitLocalArg();
if (ImplicitLocalArgRange.isAvailable())
for (auto Prop : ImplicitLocalArgRange) {
m_KernelImplicitLocalArgPos[Prop->Name] =
DeviceBinaryProperty(Prop).asUint32();
auto It = m_DeviceKernelInfoMap.find(Prop->Name);
assert(It != m_DeviceKernelInfoMap.end());
It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32());
}
}

std::optional<int>
ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
if (it != m_KernelImplicitLocalArgPos.end())
return it->second;
return {};
DeviceKernelInfo &
ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
auto It = m_DeviceKernelInfoMap.find(KernelNameStrT{Info.Name.data()});
assert(It != m_DeviceKernelInfoMap.end());
It->second.setCompileTimeInfoIfNeeded(Info);
return It->second;
}

DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
const CompileTimeKernelInfoTy &Info) {
DeviceKernelInfo &
ProgramManager::getDeviceKernelInfo(KernelNameStrRefT KernelName) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
auto [Iter, Inserted] =
m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name.data()}, Info);
if (!Inserted)
Iter->second.setCompileTimeInfoIfNeeded(Info);
return Iter->second;
auto It = m_DeviceKernelInfoMap.find(KernelName);
assert(It != m_DeviceKernelInfoMap.end());
return It->second;
}

DeviceKernelInfo &
ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) {
DeviceKernelInfo *
ProgramManager::tryGetDeviceKernelInfo(KernelNameStrRefT KernelName) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(KernelName)};
auto Result =
m_DeviceKernelInfoMap.try_emplace(KernelName, DefaultCompileTimeInfo);
return Result.first->second;
auto It = m_DeviceKernelInfoMap.find(KernelName);
return It != m_DeviceKernelInfoMap.end() ? &It->second : nullptr;
}

static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg,
Expand Down Expand Up @@ -2039,6 +2040,10 @@ void ProgramManager::addImage(sycl_device_binary RawImg,
m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
KernelIDs->push_back(It->second);

CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)};
m_DeviceKernelInfoMap.try_emplace(KernelNameStrT(name),
DefaultCompileTimeInfo);

// Keep track of image to kernel name reference count for cleanup.
m_KernelNameRefCount[name]++;
}
Expand Down Expand Up @@ -2232,8 +2237,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) {
if (--RefCount == 0) {
// TODO aggregate all these maps into a single one since their entries
// share lifetime.
m_KernelUsesAssert.erase(Name);
m_KernelImplicitLocalArgPos.erase(Name);
m_DeviceKernelInfoMap.erase(Name);
m_KernelNameRefCount.erase(RefCountIt);
if (Name2IDIt != m_KernelName2KernelIDs.end())
Expand Down
29 changes: 5 additions & 24 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,19 +365,14 @@ class ProgramManager {
ProgramManager();
~ProgramManager() = default;

template <typename NameT>
bool kernelUsesAssert(const NameT &KernelName) const {
return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
}

SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; }

std::optional<int>
kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const;
void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img);
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);

DeviceKernelInfo &
getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName);
DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
DeviceKernelInfo &getDeviceKernelInfo(KernelNameStrRefT KernelName);
DeviceKernelInfo *tryGetDeviceKernelInfo(KernelNameStrRefT KernelName);

std::set<const RTDeviceBinaryImage *>
getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
Expand Down Expand Up @@ -406,12 +401,6 @@ class ProgramManager {
/// Dumps image to current directory
void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;

/// Add info on kernels using assert into cache
void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img);

/// Add info on kernels using local arg into cache
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);

std::set<const RTDeviceBinaryImage *>
collectDependentDeviceImagesForVirtualFunctions(
const RTDeviceBinaryImage &Img, const device_impl &Dev);
Expand Down Expand Up @@ -518,14 +507,6 @@ class ProgramManager {
bool m_UseSpvFile = false;
RTDeviceBinaryImageUPtr m_SpvFileImage;

// std::less<> is a transparent comparator that enabled comparison between
// different types without temporary key_type object creation. This includes
// standard overloads, such as comparison between std::string and
// std::string_view or just char*.
using KernelUsesAssertSet = std::set<KernelNameStrT, std::less<>>;
KernelUsesAssertSet m_KernelUsesAssert;
std::unordered_map<KernelNameStrT, int> m_KernelImplicitLocalArgPos;

// Map for storing device kernel information. Runtime lookup should be avoided
// by caching the pointers when possible.
std::unordered_map<KernelNameStrT, DeviceKernelInfo> m_DeviceKernelInfoMap;
Expand Down
7 changes: 3 additions & 4 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ event handler::finalize() {
// Fetch the device kernel info pointer if it hasn't been set (e.g.
// in kernel bundle or free function cases).
impl->MKernelData.setDeviceKernelInfoPtr(
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
toKernelNameStrT(MKernelName)));
}
assert(impl->MKernelData.getKernelName() == MKernelName);
Expand Down Expand Up @@ -974,7 +974,7 @@ void handler::extractArgsAndReqs() {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
if (impl->MKernelData.getDeviceKernelInfoPtr() == nullptr) {
impl->MKernelData.setDeviceKernelInfoPtr(
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
detail::toKernelNameStrT(MKernel->getName())));
}
#endif
Expand Down Expand Up @@ -2249,8 +2249,7 @@ void handler::setKernelNameBasedCachePtr(
HandlerInfo.IsESIMD = impl->MKernelIsESIMD;
HandlerInfo.HasSpecialCaptures = impl->MKernelHasSpecialCaptures;
impl->MKernelData.setDeviceKernelInfoPtr(
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
HandlerInfo));
&detail::ProgramManager::getInstance().getDeviceKernelInfo(HandlerInfo));
}

void handler::setKernelInfo(
Expand Down
17 changes: 11 additions & 6 deletions sycl/test-e2e/Config/kernel_from_file.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
// REQUIRES: target-spir

// FIXME Disabled fallback assert as it'll require either online linking or
// explicit offline linking step here
// FIXME separate compilation requires -fno-sycl-dead-args-optimization
// As we are doing a separate device compilation here, we need to explicitly
// add the device lib instrumentation (itt_compiler_wrapper)
// RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
// RUN: %clangxx -Wno-error=ignored-attributes -DUSED_KERNEL -fno-sycl-dead-args-optimization %cxx_std_optionc++17 -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
// >> ---- unbundle compiler wrapper and asan device objects
// RUN: clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-itt-compiler-wrappers%obj_ext -output=%t_compiler_wrappers.bc -unbundle
// RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-asan%obj_ext -output=%t_asan.bc -unbundle %}
// >> ---- link device code
// RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_asan.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %}
// >> ---- translate to SPIR-V
// RUN: llvm-spirv -o %t.spv %t_app.bc
// RUN: %clangxx -Wno-error=ignored-attributes %sycl_include -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -Xclang -verify-ignore-unexpected=note,warning %if preview-mode %{-Wno-unused-command-line-argument%}
// Need to perform full compilation here since the SYCL runtime uses image
// properties from the fat binary.
// RUN: %{build} -fno-sycl-dead-args-optimization -o %t.out
// RUN: env SYCL_USE_KERNEL_SPV=%t.spv %{run} %t.out

#include <iostream>
Expand All @@ -31,10 +31,15 @@ int main(int argc, char **argv) {
event e = myQueue.submit([&](handler &cgh) {
auto ptr = buf.get_access<access::mode::read_write>(cgh);

cgh.single_task<class my_kernel>([=]() { ptr[0]++; });
cgh.single_task<class my_kernel>([=]() {
#ifdef USED_KERNEL
ptr[0]++;
#else
ptr[0]--;
#endif
});
});
e.wait_and_throw();

} catch (sycl::exception const &e) {
std::cerr << "SYCL exception caught:\n";
std::cerr << e.what() << "\n";
Expand Down
6 changes: 6 additions & 0 deletions sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ class Kernel3;
MOCK_INTEGRATION_HEADER(Kernel1)
MOCK_INTEGRATION_HEADER(Kernel2)
MOCK_INTEGRATION_HEADER(Kernel3)
static sycl::unittest::MockDeviceImage CommandGraphImgs[3] = {
sycl::unittest::generateDefaultImage({"Kernel1"}),
sycl::unittest::generateDefaultImage({"Kernel2"}),
sycl::unittest::generateDefaultImage({"Kernel3"})};
static sycl::unittest::MockDeviceImageArray<3> CommandGraphImgArray{
CommandGraphImgs};

using namespace sycl;
using namespace sycl::ext::oneapi;
Expand Down
Loading
Loading