Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions sycl/source/detail/device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,9 @@ namespace sycl {
inline namespace _V1 {
namespace detail {

DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info)
: CompileTimeKernelInfoTy(Info) {
init(Name.data());
}

void DeviceKernelInfo::init(std::string_view KernelName) {
auto &PM = detail::ProgramManager::getInstance();
MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName);
DeviceKernelInfo::DeviceKernelInfo(std::string_view Name,
std::optional<int> ImplicitLocalArgPos)
: CompileTimeKernelInfoTy{Name}, MImplicitLocalArgPos{ImplicitLocalArgPos} {
}

template <typename OtherTy>
Expand Down Expand Up @@ -49,7 +44,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
assert(isCompileTimeInfoSet());
assert(Info == *this);
}

} // namespace detail
} // namespace _V1
} // namespace sycl
8 changes: 4 additions & 4 deletions sycl/source/detail/device_kernel_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,22 +89,22 @@ struct FastKernelSubcacheT {
// into this structure and get rid of the other KernelName -> * maps.
class DeviceKernelInfo : public CompileTimeKernelInfoTy {
public:
DeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
DeviceKernelInfo(std::string_view Name,
std::optional<int> ImplicitLocalArgPos = {});

void init(std::string_view KernelName);
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);

FastKernelSubcacheT &getKernelSubcache() { return MFastKernelSubcache; }

std::optional<int> getImplicitLocalArgPos() const {
const std::optional<int> &getImplicitLocalArgPos() const {
return MImplicitLocalArgPos;
}

private:
bool isCompileTimeInfoSet() const { return KernelSize != 0; }

FastKernelSubcacheT MFastKernelSubcache;
std::optional<int> MImplicitLocalArgPos;
const std::optional<int> MImplicitLocalArgPos;
};

} // namespace detail
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/get_device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ inline namespace _V1 {
namespace detail {

DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info);
return ProgramManager::getInstance().getDeviceKernelInfo(Info);
}

} // namespace detail
Expand Down
26 changes: 16 additions & 10 deletions sycl/source/detail/kernel_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ namespace sycl {
inline namespace _V1 {
namespace detail {

static CompileTimeKernelInfoTy
createCompileTimeKernelInfo(std::string_view KernelName = {}) {
return CompileTimeKernelInfoTy{KernelName};
}

kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
context_impl &Context,
kernel_bundle_impl *KernelBundleImpl,
Expand All @@ -31,8 +26,8 @@ kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
MCreatedFromSource(true),
MKernelBundleImpl(KernelBundleImpl ? KernelBundleImpl->shared_from_this()
: nullptr),
MIsInterop(true), MKernelArgMaskPtr{ArgMask},
MInteropDeviceKernelInfo(createCompileTimeKernelInfo(getName())) {
MIsInterop(true), MKernelArgMaskPtr{ArgMask}, MOwnsDeviceKernelInfo(true),
MDeviceKernelInfo(getName()) {
ur_context_handle_t UrContext = nullptr;
// Using the adapter from the passed ContextImpl
getAdapter().call<UrApiKind::urKernelGetInfo>(
Expand All @@ -59,9 +54,10 @@ kernel_impl::kernel_impl(Managed<ur_kernel_handle_t> &&Kernel,
MKernelBundleImpl(KernelBundleImpl.shared_from_this()),
MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop),
MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex},
MInteropDeviceKernelInfo(MIsInterop
? createCompileTimeKernelInfo(getName())
: createCompileTimeKernelInfo()) {
MOwnsDeviceKernelInfo(checkOwnsDeviceKernelInfo()),
MDeviceKernelInfo(MOwnsDeviceKernelInfo ? getName()
: std::string_view()) {

// Enable USM indirect access for interop and non-sycl-jit source kernels.
// sycl-jit kernels will enable this if needed through the regular kernel
// path.
Expand Down Expand Up @@ -121,6 +117,16 @@ std::string_view kernel_impl::getName() const {
return MName;
}

bool kernel_impl::checkOwnsDeviceKernelInfo() {
// If the image originates from something other than standard offline
// compilation, this kernel needs to own its info structure.
// We could also have a mixed origin image, in which case the device kernel
// info might reside in program manager.
return MDeviceImageImpl->getOriginMask() != ImageOriginSYCLOffline &&
(!(MDeviceImageImpl->getOriginMask() & ImageOriginSYCLOffline) ||
!ProgramManager::getInstance().tryGetDeviceKernelInfo(getName()));
}

bool kernel_impl::isBuiltInKernel(device_impl &Device) const {
auto BuiltInKernels = Device.get_info<info::device::built_in_kernel_ids>();
if (BuiltInKernels.empty())
Expand Down
13 changes: 8 additions & 5 deletions sycl/source/detail/kernel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,11 @@ class kernel_impl {
std::mutex *getCacheMutex() const { return MCacheMutex; }
std::string_view getName() const;

bool checkOwnsDeviceKernelInfo();
DeviceKernelInfo &getDeviceKernelInfo() {
return MIsInterop
? MInteropDeviceKernelInfo
: ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
return MOwnsDeviceKernelInfo
? MDeviceKernelInfo
: ProgramManager::getInstance().getDeviceKernelInfo(
Comment on lines +244 to +246
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just have a reference/pointer as the member (potentially pointing to the owning smart pointer member) instead of doing those lookups?

Copy link
Contributor Author

@sergey-semenov sergey-semenov Oct 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we should. I was planning that as a follow-up change since the lookup is already there right now.

std::string_view(getName()));
}

Expand All @@ -259,9 +260,11 @@ class kernel_impl {
std::mutex *MCacheMutex = nullptr;
mutable std::string MName;

// It is used for the interop kernels only.
// Used for images that aren't obtained with standard SYCL offline
// compilation.
// For regular kernel we get DeviceKernelInfo from the ProgramManager.
DeviceKernelInfo MInteropDeviceKernelInfo;
bool MOwnsDeviceKernelInfo = false;
DeviceKernelInfo MDeviceKernelInfo;

bool isBuiltInKernel(device_impl &Device) const;
void checkIfValidForNumArgsInfoQuery() const;
Expand Down
59 changes: 35 additions & 24 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1788,33 +1788,28 @@ Managed<ur_program_handle_t> ProgramManager::build(
return LinkedProg;
}

void ProgramManager::cacheKernelImplicitLocalArg(
const RTDeviceBinaryImage &Img) {
const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange =
Img.getImplicitLocalArg();
if (ImplicitLocalArgRange.isAvailable())
for (auto Prop : ImplicitLocalArgRange) {
m_KernelImplicitLocalArgPos[Prop->Name] =
DeviceBinaryProperty(Prop).asUint32();
}
DeviceKernelInfo &
ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
auto It = m_DeviceKernelInfoMap.find(Info.Name);
assert(It != m_DeviceKernelInfoMap.end());
It->second.setCompileTimeInfoIfNeeded(Info);
return It->second;
}

DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
const CompileTimeKernelInfoTy &Info) {
DeviceKernelInfo &
ProgramManager::getDeviceKernelInfo(std::string_view KernelName) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
auto [Iter, Inserted] = m_DeviceKernelInfoMap.try_emplace(Info.Name, Info);
if (!Inserted)
Iter->second.setCompileTimeInfoIfNeeded(Info);
return Iter->second;
auto It = m_DeviceKernelInfoMap.find(KernelName);
assert(It != m_DeviceKernelInfoMap.end());
return It->second;
}

DeviceKernelInfo &
ProgramManager::getOrCreateDeviceKernelInfo(std::string_view KernelName) {
DeviceKernelInfo *
ProgramManager::tryGetDeviceKernelInfo(std::string_view KernelName) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
CompileTimeKernelInfoTy DefaultCompileTimeInfo{KernelName};
auto Result =
m_DeviceKernelInfoMap.try_emplace(KernelName, DefaultCompileTimeInfo);
return Result.first->second;
auto It = m_DeviceKernelInfoMap.find(KernelName);
return It != m_DeviceKernelInfoMap.end() ? &It->second : nullptr;
}

static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg,
Expand Down Expand Up @@ -1993,6 +1988,16 @@ void ProgramManager::addImage(sycl_device_binary RawImg,
m_BinImg2KernelIDs[Img.get()];
KernelIDs.reset(new std::vector<kernel_id>);

std::unordered_map<std::string_view, int> ImplicitLocalArgPositions;
const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange =
Img->getImplicitLocalArg();
if (ImplicitLocalArgRange.isAvailable())
for (auto Prop : ImplicitLocalArgRange) {
auto Result = ImplicitLocalArgPositions.try_emplace(
Prop->Name, DeviceBinaryProperty(Prop).asUint32());
assert(Result.second && "Duplicate implicit arg property");
}

for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
EntriesIt = EntriesIt->Increment()) {

Expand All @@ -2017,6 +2022,15 @@ void ProgramManager::addImage(sycl_device_binary RawImg,
m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
KernelIDs->push_back(It->second);

std::optional<int> ImplicitLocalArgPos;
auto ImplicitLocalArgPosIt = ImplicitLocalArgPositions.find(name);
if (ImplicitLocalArgPosIt != ImplicitLocalArgPositions.end())
ImplicitLocalArgPos = ImplicitLocalArgPosIt->second;
auto Result =
m_DeviceKernelInfoMap.try_emplace(name, name, ImplicitLocalArgPos);
assert(ImplicitLocalArgPos ==
Result.first->second.getImplicitLocalArgPos() &&
"Conflicting values of implicit local arg positions");
// Keep track of image to kernel name reference count for cleanup.
m_KernelNameRefCount[name]++;
}
Expand All @@ -2037,8 +2051,6 @@ void ProgramManager::addImage(sycl_device_binary RawImg,
}
}

cacheKernelImplicitLocalArg(*Img);

// Sort kernel ids for faster search
std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash<kernel_id>{});

Expand Down Expand Up @@ -2208,7 +2220,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) {
if (--RefCount == 0) {
// TODO aggregate all these maps into a single one since their entries
// share lifetime.
m_KernelImplicitLocalArgPos.erase(Name);
m_DeviceKernelInfoMap.erase(Name);
m_KernelNameRefCount.erase(RefCountIt);
if (Name2IDIt != m_KernelName2KernelIDs.end())
Expand Down
23 changes: 3 additions & 20 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,17 +393,9 @@ class ProgramManager {

SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; }

std::optional<int>
kernelImplicitLocalArgPos(std::string_view KernelName) const {
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
if (it != m_KernelImplicitLocalArgPos.end())
return it->second;
return {};
}

DeviceKernelInfo &
getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
DeviceKernelInfo &getOrCreateDeviceKernelInfo(std::string_view KernelName);
DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
DeviceKernelInfo &getDeviceKernelInfo(std::string_view KernelName);
DeviceKernelInfo *tryGetDeviceKernelInfo(std::string_view KernelName);

std::set<const RTDeviceBinaryImage *>
getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
Expand Down Expand Up @@ -432,9 +424,6 @@ class ProgramManager {
/// Dumps image to current directory
void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;

/// Add info on kernels using local arg into cache
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);

std::set<const RTDeviceBinaryImage *>
collectDependentDeviceImagesForVirtualFunctions(
const RTDeviceBinaryImage &Img, const device_impl &Dev);
Expand Down Expand Up @@ -541,12 +530,6 @@ class ProgramManager {
bool m_UseSpvFile = false;
RTDeviceBinaryImageUPtr m_SpvFileImage;

// std::less<> is a transparent comparator that enabled comparison between
// different types without temporary key_type object creation. This includes
// standard overloads, such as comparison between std::string and
// std::string_view or just char*.
std::unordered_map<std::string_view, int> m_KernelImplicitLocalArgPos;

// Map for storing device kernel information. Runtime lookup should be avoided
// by caching the pointers when possible.
std::unordered_map<std::string_view, DeviceKernelInfo> m_DeviceKernelInfoMap;
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2452,7 +2452,7 @@ static ur_result_t SetKernelParamsAndLaunch(
applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc);
}

std::optional<int> ImplicitLocalArg =
const std::optional<int> &ImplicitLocalArg =
DeviceKernelInfo.getImplicitLocalArgPos();
// Set the implicit local memory buffer to support
// get_work_group_scratch_memory. This is for backend not supporting
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ detail::EventImplPtr handler::finalize() {
// Fetch the device kernel info pointer if it hasn't been set (e.g.
// in kernel bundle or free function cases).
impl->MKernelData.setDeviceKernelInfoPtr(
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
std::string_view(MKernelName)));
}
assert(impl->MKernelData.getKernelName() == MKernelName);
Expand Down Expand Up @@ -862,7 +862,7 @@ void handler::extractArgsAndReqs() {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
if (impl->MKernelData.getDeviceKernelInfoPtr() == nullptr) {
impl->MKernelData.setDeviceKernelInfoPtr(
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
std::string_view(MKernel->getName())));
}
#endif
Expand Down
17 changes: 11 additions & 6 deletions sycl/test-e2e/Config/kernel_from_file.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
// REQUIRES: target-spir

// FIXME Disabled fallback assert as it'll require either online linking or
// explicit offline linking step here
// FIXME separate compilation requires -fno-sycl-dead-args-optimization
// As we are doing a separate device compilation here, we need to explicitly
// add the device lib instrumentation (itt_compiler_wrapper)
// RUN: %clangxx -Wno-error=ignored-attributes -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 -fsycl-device-only -fno-sycl-dead-args-optimization -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
// RUN: %clangxx -Wno-error=ignored-attributes -DUSED_KERNEL -fno-sycl-dead-args-optimization %cxx_std_optionc++17 -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -o %t.bc -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
// >> ---- unbundle compiler wrapper and asan device objects
// RUN: clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-itt-compiler-wrappers%obj_ext -output=%t_compiler_wrappers.bc -unbundle
// RUN: %if linux %{ clang-offload-bundler -type=o -targets=sycl-spir64-unknown-unknown -input=%sycl_static_libs_dir/libsycl-asan%obj_ext -output=%t_asan.bc -unbundle %}
// >> ---- link device code
// RUN: %if linux %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %t_asan.bc %} %else %{ llvm-link -o=%t_app.bc %t.bc %t_compiler_wrappers.bc %}
// >> ---- translate to SPIR-V
// RUN: llvm-spirv -o %t.spv %t_app.bc
// RUN: %clangxx -Wno-error=ignored-attributes %sycl_include -DSYCL_DISABLE_FALLBACK_ASSERT %cxx_std_optionc++17 %include_option %t.h %s -o %t.out %sycl_options -Xclang -verify-ignore-unexpected=note,warning %if preview-mode %{-Wno-unused-command-line-argument%}
// Need to perform full compilation here since the SYCL runtime uses image
// properties from the fat binary.
// RUN: %{build} -fno-sycl-dead-args-optimization -o %t.out
// RUN: env SYCL_USE_KERNEL_SPV=%t.spv %{run} %t.out

#include <iostream>
Expand All @@ -31,10 +31,15 @@ int main(int argc, char **argv) {
event e = myQueue.submit([&](handler &cgh) {
auto ptr = buf.get_access<access::mode::read_write>(cgh);

cgh.single_task<class my_kernel>([=]() { ptr[0]++; });
cgh.single_task<class my_kernel>([=]() {
#ifdef USED_KERNEL
ptr[0]++;
#else
ptr[0]--;
#endif
});
});
e.wait_and_throw();

} catch (sycl::exception const &e) {
std::cerr << "SYCL exception caught:\n";
std::cerr << e.what() << "\n";
Expand Down
6 changes: 6 additions & 0 deletions sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ class Kernel3;
MOCK_INTEGRATION_HEADER(Kernel1)
MOCK_INTEGRATION_HEADER(Kernel2)
MOCK_INTEGRATION_HEADER(Kernel3)
static sycl::unittest::MockDeviceImage CommandGraphImgs[3] = {
sycl::unittest::generateDefaultImage({"Kernel1"}),
sycl::unittest::generateDefaultImage({"Kernel2"}),
sycl::unittest::generateDefaultImage({"Kernel3"})};
static sycl::unittest::MockDeviceImageArray<3> CommandGraphImgArray{
CommandGraphImgs};

using namespace sycl;
using namespace sycl::ext::oneapi;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ using namespace sycl::ext::oneapi;

class MockKernel;
MOCK_INTEGRATION_HEADER(MockKernel)

static sycl::unittest::MockDeviceImage MockKernelImg =
sycl::unittest::generateDefaultImage({"MockKernel"});
static sycl::unittest::MockDeviceImageArray<1> MockKernelImgArray{
&MockKernelImg};
/**
* Checks that the operators and constructors of graph related classes meet the
* common reference semantics.
Expand Down
Loading
Loading