Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,22 +263,6 @@ context_impl::get_backend_info<info::device::backend_version>() const {
}
#endif

ur_context_handle_t &context_impl::getHandleRef() { return MContext; }
const ur_context_handle_t &context_impl::getHandleRef() const {
return MContext;
}

KernelProgramCache &context_impl::getKernelProgramCache() const {
return MKernelProgramCache;
}

bool context_impl::hasDevice(const detail::device_impl &Device) const {
for (device_impl *D : MDevices)
if (D == &Device)
return true;
return false;
}

device_impl *
context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const {
for (device_impl *D : MDevices)
Expand Down
13 changes: 9 additions & 4 deletions sycl/source/detail/context_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
/// reference will be invalid if context_impl was destroyed.
///
/// \return an instance of raw UR context handle.
ur_context_handle_t &getHandleRef();
ur_context_handle_t &getHandleRef() { return MContext; }

/// Gets the underlying context object (if any) without reference count
/// modification.
Expand All @@ -126,7 +126,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
/// reference will be invalid if context_impl was destroyed.
///
/// \return an instance of raw UR context handle.
const ur_context_handle_t &getHandleRef() const;
const ur_context_handle_t &getHandleRef() const { return MContext; }

devices_range getDevices() const { return MDevices; }

Expand All @@ -151,10 +151,15 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
return {MCachedLibPrograms, MCachedLibProgramsMutex};
}

KernelProgramCache &getKernelProgramCache() const;
KernelProgramCache &getKernelProgramCache() const {
return MKernelProgramCache;
}

/// Returns true if and only if context contains the given device.
bool hasDevice(const detail::device_impl &Device) const;
bool hasDevice(const detail::device_impl &Device) const {
return std::any_of(MDevices.begin(), MDevices.end(),
[&](auto *D) { return D == &Device; });
}

/// Returns true if and only if the device can be used within this context.
/// For OpenCL this is currently equivalent to hasDevice, for other backends
Expand Down
18 changes: 0 additions & 18 deletions sycl/source/detail/device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,24 +73,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
assert(Info == *this);
}

FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
assertInitialized();
return MFastKernelSubcache;
}

const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
assertInitialized();
return MImplicitLocalArgPos;
}

bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }

void DeviceKernelInfo::assertInitialized() {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
assert(MInitialized.load() && "Data needs to be initialized before use");
#endif
}

} // namespace detail
} // namespace _V1
} // namespace sycl
19 changes: 15 additions & 4 deletions sycl/source/detail/device_kernel_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,23 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
#endif
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);

FastKernelSubcacheT &getKernelSubcache();
const std::optional<int> &getImplicitLocalArgPos();
FastKernelSubcacheT &getKernelSubcache() {
assertInitialized();
return MFastKernelSubcache;
}

std::optional<int> getImplicitLocalArgPos() const {
assertInitialized();
return MImplicitLocalArgPos;
}

private:
void assertInitialized();
bool isCompileTimeInfoSet() const;
void assertInitialized() const {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
assert(MInitialized.load() && "Data needs to be initialized before use");
#endif
}
bool isCompileTimeInfoSet() const { return KernelSize != 0; }

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
std::atomic<bool> MInitialized = false;
Expand Down
55 changes: 21 additions & 34 deletions sycl/source/detail/global_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,20 +105,7 @@ void GlobalHandler::TraceEventXPTI(const char *Message) {
#endif
}

GlobalHandler *&GlobalHandler::getInstancePtr() {
static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
return RTGlobalObjHandler;
}

GlobalHandler &GlobalHandler::instance() {
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
return *RTGlobalObjHandler;
}

bool GlobalHandler::isInstanceAlive() {
return GlobalHandler::getInstancePtr();
}
GlobalHandler *GlobalHandler::RTGlobalObjHandler = new GlobalHandler();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if this was meant to be NFC changes, but technically this changes when the first object is created. Previously it would only be created if getInstancePtr was called, now it happens at the time of loading the library. I don't think that's necessarily a problem, but so far it is the only functional change I have noticed in this PR and technically it makes users pay for the instance even if they never use the global handler (implicitly). That said, it is hard to think of such a case.

Copy link
Contributor Author

@lslusarczyk lslusarczyk Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is on purpose to avoid slower access to static variables declared inside a method as we discussed in this comment of original PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't feel strongly against doing this, but I wonder if it would be cleaner to do it in a separate PR and mark this "[NFCI]". That way, if there are unforeseen issues with doing the initialization earlier, it should hopefully be easier to isolate the cause when bisecting.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated summary to not miss this change when doing "git log" later. Can we leave as it? All CI already passed.
Hopefully work of extracting this non-NFC change will not be have to be paid at all. Risk is tiny I think. In case of problems we will pay - I will revert, extract & reapply part.


template <typename T, typename... Types>
T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types &&...Args) {
Expand Down Expand Up @@ -331,8 +318,7 @@ void GlobalHandler::drainThreadPool() {
// 2) when process is being terminated
void shutdown_early(bool CanJoinThreads = true) {
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
if (!Handler)
if (!GlobalHandler::RTGlobalObjHandler)
return;

#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32)
Expand All @@ -342,26 +328,26 @@ void shutdown_early(bool CanJoinThreads = true) {
#endif

// Now that we are shutting down, we will no longer defer MemObj releases.
Handler->endDeferredRelease();
GlobalHandler::RTGlobalObjHandler->endDeferredRelease();

// Ensure neither host task is working so that no default context is accessed
// upon its release
Handler->prepareSchedulerToRelease(true);
GlobalHandler::RTGlobalObjHandler->prepareSchedulerToRelease(true);

if (Handler->MHostTaskThreadPool.Inst) {
Handler->MHostTaskThreadPool.Inst->finishAndWait(CanJoinThreads);
Handler->MHostTaskThreadPool.Inst.reset(nullptr);
if (GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst) {
GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst->finishAndWait(
CanJoinThreads);
GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst.reset(nullptr);
}

// This releases OUR reference to the default context, but
// other may yet have refs
Handler->releaseDefaultContexts();
GlobalHandler::RTGlobalObjHandler->releaseDefaultContexts();
}

void shutdown_late() {
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
if (!Handler)
if (!GlobalHandler::RTGlobalObjHandler)
return;

#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32)
Expand All @@ -371,26 +357,27 @@ void shutdown_late() {
#endif

// First, release resources, that may access adapters.
Handler->MPlatformCache.Inst.reset(nullptr);
Handler->MScheduler.Inst.reset(nullptr);
Handler->MProgramManager.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MPlatformCache.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MScheduler.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MProgramManager.Inst.reset(nullptr);

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// Kernel cache, which is part of device kernel info,
// stores handles to the adapter, so clear it before releasing adapters.
Handler->MDeviceKernelInfoStorage.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MDeviceKernelInfoStorage.Inst.reset(
nullptr);
#endif

// Clear the adapters and reset the instance if it was there.
Handler->unloadAdapters();
if (Handler->MAdapters.Inst)
Handler->MAdapters.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->unloadAdapters();
if (GlobalHandler::RTGlobalObjHandler->MAdapters.Inst)
GlobalHandler::RTGlobalObjHandler->MAdapters.Inst.reset(nullptr);

Handler->MXPTIRegistry.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MXPTIRegistry.Inst.reset(nullptr);

// Release the rest of global resources.
delete Handler;
Handler = nullptr;
delete GlobalHandler::RTGlobalObjHandler;
GlobalHandler::RTGlobalObjHandler = nullptr;
}

#ifdef _WIN32
Expand Down
25 changes: 12 additions & 13 deletions sycl/source/detail/global_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,11 @@ class DeviceKernelInfo;
/// construction or destruction is generated anyway.
class GlobalHandler {
public:
/// \return a reference to a GlobalHandler singleton instance. Memory for
/// storing objects is allocated on first call. The reference is valid as long
/// as runtime library is loaded (i.e. untill `DllMain` or
static bool isInstanceAlive() { return RTGlobalObjHandler != nullptr; }
/// \return a reference to a GlobalHandler singleton instance. The reference
/// is valid as long as runtime library is loaded (i.e. untill `DllMain` or
/// `__attribute__((destructor))` is called).
static GlobalHandler &instance();

/// \return true if the instance has not been deallocated yet.
static bool isInstanceAlive();
static GlobalHandler &instance() { return *RTGlobalObjHandler; }

GlobalHandler(const GlobalHandler &) = delete;
GlobalHandler(GlobalHandler &&) = delete;
Expand Down Expand Up @@ -96,19 +93,18 @@ class GlobalHandler {
void attachScheduler(Scheduler *Scheduler);

private:
// Constructor and destructor are declared out-of-line to allow incomplete
// types as template arguments to unique_ptr.
GlobalHandler();
~GlobalHandler();

bool OkToDefer = true;

friend void shutdown_early(bool);
friend void shutdown_late();
friend class ObjectUsageCounter;
static GlobalHandler *&getInstancePtr();
static SpinLock MSyclGlobalHandlerProtector;

// Constructor and destructor are declared out-of-line to allow incomplete
// types as template arguments to unique_ptr.
GlobalHandler();
~GlobalHandler();

template <typename T> struct InstWithLock {
std::unique_ptr<T> Inst;
SpinLock Lock;
Expand All @@ -135,7 +131,10 @@ class GlobalHandler {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
InstWithLock<std::deque<DeviceKernelInfo>> MDeviceKernelInfoStorage;
#endif

static GlobalHandler *RTGlobalObjHandler;
};

} // namespace detail
} // namespace _V1
} // namespace sycl
30 changes: 0 additions & 30 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,6 @@ static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog,
}
}

ProgramManager &ProgramManager::getInstance() {
return GlobalHandler::instance().getProgramManager();
}

static Managed<ur_program_handle_t>
createBinaryProgram(context_impl &Context, devices_range Devices,
const uint8_t **Binaries, size_t *Lengths,
Expand Down Expand Up @@ -1805,14 +1801,6 @@ void ProgramManager::cacheKernelImplicitLocalArg(
}
}

std::optional<int>
ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
if (it != m_KernelImplicitLocalArgPos.end())
return it->second;
return {};
}

DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
const CompileTimeKernelInfoTy &Info) {
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
Expand Down Expand Up @@ -2344,24 +2332,6 @@ ProgramManager::getBinImageState(const RTDeviceBinaryImage *BinImage) {
: sycl::bundle_state::object;
}

std::optional<kernel_id>
ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) {
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);

auto KernelID = m_KernelName2KernelIDs.find(KernelName);
if (KernelID == m_KernelName2KernelIDs.end())
return std::nullopt;

return KernelID->second;
}

kernel_id ProgramManager::getSYCLKernelID(KernelNameStrRefT KernelName) {
if (std::optional<kernel_id> MaybeKernelID = tryGetSYCLKernelID(KernelName))
return *MaybeKernelID;
throw exception(make_error_code(errc::runtime),
"No kernel found with the specified name");
}

bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) {
std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);

Expand Down
29 changes: 25 additions & 4 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@ class ProgramManager {
public:
// Returns the single instance of the program manager for the entire
// process. Can only be called after staticInit is done.
static ProgramManager &getInstance();
static ProgramManager &getInstance() {
return GlobalHandler::instance().getProgramManager();
}

const RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName,
context_impl &ContextImpl,
Expand Down Expand Up @@ -236,11 +238,24 @@ class ProgramManager {

// The function returns the unique SYCL kernel identifier associated with a
// kernel name or nullopt if there is no such ID.
std::optional<kernel_id> tryGetSYCLKernelID(KernelNameStrRefT KernelName);
std::optional<kernel_id> tryGetSYCLKernelID(KernelNameStrRefT KernelName) {
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);

auto KernelID = m_KernelName2KernelIDs.find(KernelName);
if (KernelID == m_KernelName2KernelIDs.end())
return std::nullopt;

return KernelID->second;
}

// The function returns the unique SYCL kernel identifier associated with a
// kernel name or throws a sycl exception if there is no such ID.
kernel_id getSYCLKernelID(KernelNameStrRefT KernelName);
kernel_id getSYCLKernelID(KernelNameStrRefT KernelName) {
if (std::optional<kernel_id> MaybeKernelID = tryGetSYCLKernelID(KernelName))
return *MaybeKernelID;
throw exception(make_error_code(errc::runtime),
"No kernel found with the specified name");
}

// The function returns a vector containing all unique SYCL kernel identifiers
// in SYCL device images.
Expand Down Expand Up @@ -375,7 +390,12 @@ class ProgramManager {
SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; }

std::optional<int>
kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const;
kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
if (it != m_KernelImplicitLocalArgPos.end())
return it->second;
return {};
}

DeviceKernelInfo &
getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
Expand Down Expand Up @@ -562,6 +582,7 @@ class ProgramManager {

friend class ::ProgramManagerTest;
};

} // namespace detail
} // namespace _V1
} // namespace sycl
Loading