diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 6fb2dd375fe37..026d1289e05ca 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -263,22 +263,6 @@ context_impl::get_backend_info() const { } #endif -ur_context_handle_t &context_impl::getHandleRef() { return MContext; } -const ur_context_handle_t &context_impl::getHandleRef() const { - return MContext; -} - -KernelProgramCache &context_impl::getKernelProgramCache() const { - return MKernelProgramCache; -} - -bool context_impl::hasDevice(const detail::device_impl &Device) const { - for (device_impl *D : MDevices) - if (D == &Device) - return true; - return false; -} - device_impl * context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const { for (device_impl *D : MDevices) diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 0ae3df8dcf397..3b42a6319d223 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -116,7 +116,7 @@ class context_impl : public std::enable_shared_from_this { /// reference will be invalid if context_impl was destroyed. /// /// \return an instance of raw UR context handle. - ur_context_handle_t &getHandleRef(); + ur_context_handle_t &getHandleRef() { return MContext; } /// Gets the underlying context object (if any) without reference count /// modification. @@ -126,7 +126,7 @@ class context_impl : public std::enable_shared_from_this { /// reference will be invalid if context_impl was destroyed. /// /// \return an instance of raw UR context handle. - const ur_context_handle_t &getHandleRef() const; + const ur_context_handle_t &getHandleRef() const { return MContext; } devices_range getDevices() const { return MDevices; } @@ -151,10 +151,15 @@ class context_impl : public std::enable_shared_from_this { return {MCachedLibPrograms, MCachedLibProgramsMutex}; } - KernelProgramCache &getKernelProgramCache() const; + KernelProgramCache &getKernelProgramCache() const { + return MKernelProgramCache; + } /// Returns true if and only if context contains the given device. - bool hasDevice(const detail::device_impl &Device) const; + bool hasDevice(const detail::device_impl &Device) const { + return std::any_of(MDevices.begin(), MDevices.end(), + [&](auto *D) { return D == &Device; }); + } /// Returns true if and only if the device can be used within this context. /// For OpenCL this is currently equivalent to hasDevice, for other backends diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index 5c6dfad0d633d..790a7c9cbaafa 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -73,24 +73,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded( assert(Info == *this); } -FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() { - assertInitialized(); - return MFastKernelSubcache; -} - -const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() { - assertInitialized(); - return MImplicitLocalArgPos; -} - -bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; } - -void DeviceKernelInfo::assertInitialized() { -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES - assert(MInitialized.load() && "Data needs to be initialized before use"); -#endif -} - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index c76db34a3227d..a91420ab5bc13 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -107,12 +107,23 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { #endif void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info); - FastKernelSubcacheT &getKernelSubcache(); - const std::optional &getImplicitLocalArgPos(); + FastKernelSubcacheT &getKernelSubcache() { + assertInitialized(); + return MFastKernelSubcache; + } + + std::optional getImplicitLocalArgPos() const { + assertInitialized(); + return MImplicitLocalArgPos; + } private: - void assertInitialized(); - bool isCompileTimeInfoSet() const; + void assertInitialized() const { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + assert(MInitialized.load() && "Data needs to be initialized before use"); +#endif + } + bool isCompileTimeInfoSet() const { return KernelSize != 0; } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES std::atomic MInitialized = false; diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index eb7d11d3b29d4..97e67523cff86 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -105,20 +105,7 @@ void GlobalHandler::TraceEventXPTI(const char *Message) { #endif } -GlobalHandler *&GlobalHandler::getInstancePtr() { - static GlobalHandler *RTGlobalObjHandler = new GlobalHandler(); - return RTGlobalObjHandler; -} - -GlobalHandler &GlobalHandler::instance() { - GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); - assert(RTGlobalObjHandler && "Handler must not be deallocated earlier"); - return *RTGlobalObjHandler; -} - -bool GlobalHandler::isInstanceAlive() { - return GlobalHandler::getInstancePtr(); -} +GlobalHandler *GlobalHandler::RTGlobalObjHandler = new GlobalHandler(); template T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types &&...Args) { @@ -331,8 +318,7 @@ void GlobalHandler::drainThreadPool() { // 2) when process is being terminated void shutdown_early(bool CanJoinThreads = true) { const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; - GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); - if (!Handler) + if (!GlobalHandler::RTGlobalObjHandler) return; #if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32) @@ -342,26 +328,26 @@ void shutdown_early(bool CanJoinThreads = true) { #endif // Now that we are shutting down, we will no longer defer MemObj releases. - Handler->endDeferredRelease(); + GlobalHandler::RTGlobalObjHandler->endDeferredRelease(); // Ensure neither host task is working so that no default context is accessed // upon its release - Handler->prepareSchedulerToRelease(true); + GlobalHandler::RTGlobalObjHandler->prepareSchedulerToRelease(true); - if (Handler->MHostTaskThreadPool.Inst) { - Handler->MHostTaskThreadPool.Inst->finishAndWait(CanJoinThreads); - Handler->MHostTaskThreadPool.Inst.reset(nullptr); + if (GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst) { + GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst->finishAndWait( + CanJoinThreads); + GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst.reset(nullptr); } // This releases OUR reference to the default context, but // other may yet have refs - Handler->releaseDefaultContexts(); + GlobalHandler::RTGlobalObjHandler->releaseDefaultContexts(); } void shutdown_late() { const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; - GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); - if (!Handler) + if (!GlobalHandler::RTGlobalObjHandler) return; #if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32) @@ -371,26 +357,27 @@ void shutdown_late() { #endif // First, release resources, that may access adapters. - Handler->MPlatformCache.Inst.reset(nullptr); - Handler->MScheduler.Inst.reset(nullptr); - Handler->MProgramManager.Inst.reset(nullptr); + GlobalHandler::RTGlobalObjHandler->MPlatformCache.Inst.reset(nullptr); + GlobalHandler::RTGlobalObjHandler->MScheduler.Inst.reset(nullptr); + GlobalHandler::RTGlobalObjHandler->MProgramManager.Inst.reset(nullptr); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES // Kernel cache, which is part of device kernel info, // stores handles to the adapter, so clear it before releasing adapters. - Handler->MDeviceKernelInfoStorage.Inst.reset(nullptr); + GlobalHandler::RTGlobalObjHandler->MDeviceKernelInfoStorage.Inst.reset( + nullptr); #endif // Clear the adapters and reset the instance if it was there. - Handler->unloadAdapters(); - if (Handler->MAdapters.Inst) - Handler->MAdapters.Inst.reset(nullptr); + GlobalHandler::RTGlobalObjHandler->unloadAdapters(); + if (GlobalHandler::RTGlobalObjHandler->MAdapters.Inst) + GlobalHandler::RTGlobalObjHandler->MAdapters.Inst.reset(nullptr); - Handler->MXPTIRegistry.Inst.reset(nullptr); + GlobalHandler::RTGlobalObjHandler->MXPTIRegistry.Inst.reset(nullptr); // Release the rest of global resources. - delete Handler; - Handler = nullptr; + delete GlobalHandler::RTGlobalObjHandler; + GlobalHandler::RTGlobalObjHandler = nullptr; } #ifdef _WIN32 diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index ec7bf7da48b6a..5b0a01d8cef7d 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -48,14 +48,11 @@ class DeviceKernelInfo; /// construction or destruction is generated anyway. class GlobalHandler { public: - /// \return a reference to a GlobalHandler singleton instance. Memory for - /// storing objects is allocated on first call. The reference is valid as long - /// as runtime library is loaded (i.e. untill `DllMain` or + static bool isInstanceAlive() { return RTGlobalObjHandler != nullptr; } + /// \return a reference to a GlobalHandler singleton instance. The reference + /// is valid as long as runtime library is loaded (i.e. untill `DllMain` or /// `__attribute__((destructor))` is called). - static GlobalHandler &instance(); - - /// \return true if the instance has not been deallocated yet. - static bool isInstanceAlive(); + static GlobalHandler &instance() { return *RTGlobalObjHandler; } GlobalHandler(const GlobalHandler &) = delete; GlobalHandler(GlobalHandler &&) = delete; @@ -96,19 +93,18 @@ class GlobalHandler { void attachScheduler(Scheduler *Scheduler); private: + // Constructor and destructor are declared out-of-line to allow incomplete + // types as template arguments to unique_ptr. + GlobalHandler(); + ~GlobalHandler(); + bool OkToDefer = true; friend void shutdown_early(bool); friend void shutdown_late(); friend class ObjectUsageCounter; - static GlobalHandler *&getInstancePtr(); static SpinLock MSyclGlobalHandlerProtector; - // Constructor and destructor are declared out-of-line to allow incomplete - // types as template arguments to unique_ptr. - GlobalHandler(); - ~GlobalHandler(); - template struct InstWithLock { std::unique_ptr Inst; SpinLock Lock; @@ -135,7 +131,10 @@ class GlobalHandler { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES InstWithLock> MDeviceKernelInfoStorage; #endif + + static GlobalHandler *RTGlobalObjHandler; }; + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index a2245349206da..24d0aef5754f1 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -66,10 +66,6 @@ static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog, } } -ProgramManager &ProgramManager::getInstance() { - return GlobalHandler::instance().getProgramManager(); -} - static Managed createBinaryProgram(context_impl &Context, devices_range Devices, const uint8_t **Binaries, size_t *Lengths, @@ -1805,14 +1801,6 @@ void ProgramManager::cacheKernelImplicitLocalArg( } } -std::optional -ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { - auto it = m_KernelImplicitLocalArgPos.find(KernelName); - if (it != m_KernelImplicitLocalArgPos.end()) - return it->second; - return {}; -} - DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo( const CompileTimeKernelInfoTy &Info) { std::lock_guard Guard(m_DeviceKernelInfoMapMutex); @@ -2344,24 +2332,6 @@ ProgramManager::getBinImageState(const RTDeviceBinaryImage *BinImage) { : sycl::bundle_state::object; } -std::optional -ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - - auto KernelID = m_KernelName2KernelIDs.find(KernelName); - if (KernelID == m_KernelName2KernelIDs.end()) - return std::nullopt; - - return KernelID->second; -} - -kernel_id ProgramManager::getSYCLKernelID(KernelNameStrRefT KernelName) { - if (std::optional MaybeKernelID = tryGetSYCLKernelID(KernelName)) - return *MaybeKernelID; - throw exception(make_error_code(errc::runtime), - "No kernel found with the specified name"); -} - bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) { std::lock_guard Guard(m_KernelIDsMutex); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index dc1c5c640333d..94c6114c9dbc2 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -134,7 +134,9 @@ class ProgramManager { public: // Returns the single instance of the program manager for the entire // process. Can only be called after staticInit is done. - static ProgramManager &getInstance(); + static ProgramManager &getInstance() { + return GlobalHandler::instance().getProgramManager(); + } const RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, context_impl &ContextImpl, @@ -236,11 +238,24 @@ class ProgramManager { // The function returns the unique SYCL kernel identifier associated with a // kernel name or nullopt if there is no such ID. - std::optional tryGetSYCLKernelID(KernelNameStrRefT KernelName); + std::optional tryGetSYCLKernelID(KernelNameStrRefT KernelName) { + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + + auto KernelID = m_KernelName2KernelIDs.find(KernelName); + if (KernelID == m_KernelName2KernelIDs.end()) + return std::nullopt; + + return KernelID->second; + } // The function returns the unique SYCL kernel identifier associated with a // kernel name or throws a sycl exception if there is no such ID. - kernel_id getSYCLKernelID(KernelNameStrRefT KernelName); + kernel_id getSYCLKernelID(KernelNameStrRefT KernelName) { + if (std::optional MaybeKernelID = tryGetSYCLKernelID(KernelName)) + return *MaybeKernelID; + throw exception(make_error_code(errc::runtime), + "No kernel found with the specified name"); + } // The function returns a vector containing all unique SYCL kernel identifiers // in SYCL device images. @@ -375,7 +390,12 @@ class ProgramManager { SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } std::optional - kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; + kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { + auto it = m_KernelImplicitLocalArgPos.find(KernelName); + if (it != m_KernelImplicitLocalArgPos.end()) + return it->second; + return {}; + } DeviceKernelInfo & getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); @@ -562,6 +582,7 @@ class ProgramManager { friend class ::ProgramManagerTest; }; + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 9de9d63ecd188..e13e445c9fe59 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2279,23 +2279,6 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, ur_kernel_handle_t Kernel, } } -// We have the following mapping between dimensions with SPIR-V builtins: -// 1D: id[0] -> x -// 2D: id[0] -> y, id[1] -> x -// 3D: id[0] -> z, id[1] -> y, id[2] -> x -// So in order to ensure the correctness we update all the kernel -// parameters accordingly. -// Initially we keep the order of NDRDescT as it provided by the user, this -// simplifies overall handling and do the reverse only when -// the kernel is enqueued. -void ReverseRangeDimensionsForKernel(NDRDescT &NDR) { - if (NDR.Dims > 1) { - std::swap(NDR.GlobalSize[0], NDR.GlobalSize[NDR.Dims - 1]); - std::swap(NDR.LocalSize[0], NDR.LocalSize[NDR.Dims - 1]); - std::swap(NDR.GlobalOffset[0], NDR.GlobalOffset[NDR.Dims - 1]); - } -} - ur_mem_flags_t AccessModeToUr(access::mode AccessorMode) { switch (AccessorMode) { case access::mode::read: diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index d47a5d9d9131f..1f3264c36a997 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -796,7 +796,22 @@ void applyFuncOnFilteredArgs( } } -void ReverseRangeDimensionsForKernel(NDRDescT &NDR); +// We have the following mapping between dimensions with SPIR-V builtins: +// 1D: id[0] -> x +// 2D: id[0] -> y, id[1] -> x +// 3D: id[0] -> z, id[1] -> y, id[2] -> x +// So in order to ensure the correctness we update all the kernel +// parameters accordingly. +// Initially we keep the order of NDRDescT as it provided by the user, this +// simplifies overall handling and do the reverse only when +// the kernel is enqueued. +inline void ReverseRangeDimensionsForKernel(NDRDescT &NDR) { + if (NDR.Dims > 1) { + std::swap(NDR.GlobalSize[0], NDR.GlobalSize[NDR.Dims - 1]); + std::swap(NDR.LocalSize[0], NDR.LocalSize[NDR.Dims - 1]); + std::swap(NDR.GlobalOffset[0], NDR.GlobalOffset[NDR.Dims - 1]); + } +} } // namespace detail } // namespace _V1