Skip to content

Commit 08e1487

Browse files
committed
[SYCL] optimize enqueueImpKernel by inlining
1 parent ceae49b commit 08e1487

File tree

10 files changed

+110
-142
lines changed

10 files changed

+110
-142
lines changed

sycl/source/detail/context_impl.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -263,22 +263,6 @@ context_impl::get_backend_info<info::device::backend_version>() const {
263263
}
264264
#endif
265265

266-
ur_context_handle_t &context_impl::getHandleRef() { return MContext; }
267-
const ur_context_handle_t &context_impl::getHandleRef() const {
268-
return MContext;
269-
}
270-
271-
KernelProgramCache &context_impl::getKernelProgramCache() const {
272-
return MKernelProgramCache;
273-
}
274-
275-
bool context_impl::hasDevice(const detail::device_impl &Device) const {
276-
for (device_impl *D : MDevices)
277-
if (D == &Device)
278-
return true;
279-
return false;
280-
}
281-
282266
device_impl *
283267
context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const {
284268
for (device_impl *D : MDevices)

sycl/source/detail/context_impl.hpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
116116
/// reference will be invalid if context_impl was destroyed.
117117
///
118118
/// \return an instance of raw UR context handle.
119-
ur_context_handle_t &getHandleRef();
119+
ur_context_handle_t &getHandleRef() { return MContext; }
120120

121121
/// Gets the underlying context object (if any) without reference count
122122
/// modification.
@@ -126,7 +126,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
126126
/// reference will be invalid if context_impl was destroyed.
127127
///
128128
/// \return an instance of raw UR context handle.
129-
const ur_context_handle_t &getHandleRef() const;
129+
const ur_context_handle_t &getHandleRef() const { return MContext; }
130130

131131
devices_range getDevices() const { return MDevices; }
132132

@@ -151,10 +151,17 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
151151
return {MCachedLibPrograms, MCachedLibProgramsMutex};
152152
}
153153

154-
KernelProgramCache &getKernelProgramCache() const;
154+
KernelProgramCache &getKernelProgramCache() const {
155+
return MKernelProgramCache;
156+
}
155157

156158
/// Returns true if and only if context contains the given device.
157-
bool hasDevice(const detail::device_impl &Device) const;
159+
bool hasDevice(const detail::device_impl &Device) const {
160+
for (device_impl *D : MDevices)
161+
if (D == &Device)
162+
return true;
163+
return false;
164+
}
158165

159166
/// Returns true if and only if the device can be used within this context.
160167
/// For OpenCL this is currently equivalent to hasDevice, for other backends

sycl/source/detail/device_kernel_info.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -74,27 +74,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
7474
assert(Info == *this);
7575
}
7676

77-
FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
78-
assertInitialized();
79-
return MFastKernelSubcache;
80-
}
81-
bool DeviceKernelInfo::usesAssert() {
82-
assertInitialized();
83-
return MUsesAssert;
84-
}
85-
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
86-
assertInitialized();
87-
return MImplicitLocalArgPos;
88-
}
89-
90-
bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }
91-
92-
void DeviceKernelInfo::assertInitialized() {
93-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
94-
assert(MInitialized.load() && "Data needs to be initialized before use");
95-
#endif
96-
}
97-
9877
} // namespace detail
9978
} // namespace _V1
10079
} // namespace sycl

sycl/source/detail/device_kernel_info.hpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,28 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
107107
#endif
108108
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);
109109

110-
FastKernelSubcacheT &getKernelSubcache();
111-
bool usesAssert();
112-
const std::optional<int> &getImplicitLocalArgPos();
110+
FastKernelSubcacheT &getKernelSubcache() {
111+
assertInitialized();
112+
return MFastKernelSubcache;
113+
}
114+
115+
bool usesAssert() const {
116+
assertInitialized();
117+
return MUsesAssert;
118+
}
119+
120+
std::optional<int> getImplicitLocalArgPos() const {
121+
assertInitialized();
122+
return MImplicitLocalArgPos;
123+
}
113124

114125
private:
115-
void assertInitialized();
116-
bool isCompileTimeInfoSet() const;
126+
void assertInitialized() const {
127+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
128+
assert(MInitialized.load() && "Data needs to be initialized before use");
129+
#endif
130+
}
131+
bool isCompileTimeInfoSet() const { return KernelSize != 0; }
117132

118133
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
119134
std::atomic<bool> MInitialized = false;

sycl/source/detail/global_handler.cpp

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,7 @@ void GlobalHandler::TraceEventXPTI(const char *Message) {
105105
#endif
106106
}
107107

108-
GlobalHandler *&GlobalHandler::getInstancePtr() {
109-
static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
110-
return RTGlobalObjHandler;
111-
}
112-
113-
GlobalHandler &GlobalHandler::instance() {
114-
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
115-
assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
116-
return *RTGlobalObjHandler;
117-
}
118-
119-
bool GlobalHandler::isInstanceAlive() {
120-
return GlobalHandler::getInstancePtr();
121-
}
108+
GlobalHandler *GlobalHandler::RTGlobalObjHandler = new GlobalHandler();
122109

123110
template <typename T, typename... Types>
124111
T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types &&...Args) {
@@ -331,8 +318,7 @@ void GlobalHandler::drainThreadPool() {
331318
// 2) when process is being terminated
332319
void shutdown_early(bool CanJoinThreads = true) {
333320
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
334-
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
335-
if (!Handler)
321+
if (!GlobalHandler::RTGlobalObjHandler)
336322
return;
337323

338324
#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32)
@@ -342,26 +328,26 @@ void shutdown_early(bool CanJoinThreads = true) {
342328
#endif
343329

344330
// Now that we are shutting down, we will no longer defer MemObj releases.
345-
Handler->endDeferredRelease();
331+
GlobalHandler::RTGlobalObjHandler->endDeferredRelease();
346332

347333
// Ensure neither host task is working so that no default context is accessed
348334
// upon its release
349-
Handler->prepareSchedulerToRelease(true);
335+
GlobalHandler::RTGlobalObjHandler->prepareSchedulerToRelease(true);
350336

351-
if (Handler->MHostTaskThreadPool.Inst) {
352-
Handler->MHostTaskThreadPool.Inst->finishAndWait(CanJoinThreads);
353-
Handler->MHostTaskThreadPool.Inst.reset(nullptr);
337+
if (GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst) {
338+
GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst->finishAndWait(
339+
CanJoinThreads);
340+
GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst.reset(nullptr);
354341
}
355342

356343
// This releases OUR reference to the default context, but
357344
// other may yet have refs
358-
Handler->releaseDefaultContexts();
345+
GlobalHandler::RTGlobalObjHandler->releaseDefaultContexts();
359346
}
360347

361348
void shutdown_late() {
362349
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
363-
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
364-
if (!Handler)
350+
if (!GlobalHandler::RTGlobalObjHandler)
365351
return;
366352

367353
#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32)
@@ -371,26 +357,27 @@ void shutdown_late() {
371357
#endif
372358

373359
// First, release resources, that may access adapters.
374-
Handler->MPlatformCache.Inst.reset(nullptr);
375-
Handler->MScheduler.Inst.reset(nullptr);
376-
Handler->MProgramManager.Inst.reset(nullptr);
360+
GlobalHandler::RTGlobalObjHandler->MPlatformCache.Inst.reset(nullptr);
361+
GlobalHandler::RTGlobalObjHandler->MScheduler.Inst.reset(nullptr);
362+
GlobalHandler::RTGlobalObjHandler->MProgramManager.Inst.reset(nullptr);
377363

378364
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
379365
// Kernel cache, which is part of device kernel info,
380366
// stores handles to the adapter, so clear it before releasing adapters.
381-
Handler->MDeviceKernelInfoStorage.Inst.reset(nullptr);
367+
GlobalHandler::RTGlobalObjHandler->MDeviceKernelInfoStorage.Inst.reset(
368+
nullptr);
382369
#endif
383370

384371
// Clear the adapters and reset the instance if it was there.
385-
Handler->unloadAdapters();
386-
if (Handler->MAdapters.Inst)
387-
Handler->MAdapters.Inst.reset(nullptr);
372+
GlobalHandler::RTGlobalObjHandler->unloadAdapters();
373+
if (GlobalHandler::RTGlobalObjHandler->MAdapters.Inst)
374+
GlobalHandler::RTGlobalObjHandler->MAdapters.Inst.reset(nullptr);
388375

389-
Handler->MXPTIRegistry.Inst.reset(nullptr);
376+
GlobalHandler::RTGlobalObjHandler->MXPTIRegistry.Inst.reset(nullptr);
390377

391378
// Release the rest of global resources.
392-
delete Handler;
393-
Handler = nullptr;
379+
delete GlobalHandler::RTGlobalObjHandler;
380+
GlobalHandler::RTGlobalObjHandler = nullptr;
394381
}
395382

396383
#ifdef _WIN32

sycl/source/detail/global_handler.hpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,11 @@ class DeviceKernelInfo;
4848
/// construction or destruction is generated anyway.
4949
class GlobalHandler {
5050
public:
51-
/// \return a reference to a GlobalHandler singleton instance. Memory for
52-
/// storing objects is allocated on first call. The reference is valid as long
53-
/// as runtime library is loaded (i.e. untill `DllMain` or
51+
static bool isInstanceAlive() { return RTGlobalObjHandler != nullptr; }
52+
/// \return a reference to a GlobalHandler singleton instance. The reference
53+
/// is valid as long as runtime library is loaded (i.e. untill `DllMain` or
5454
/// `__attribute__((destructor))` is called).
55-
static GlobalHandler &instance();
56-
57-
/// \return true if the instance has not been deallocated yet.
58-
static bool isInstanceAlive();
55+
static GlobalHandler &instance() { return *RTGlobalObjHandler; }
5956

6057
GlobalHandler(const GlobalHandler &) = delete;
6158
GlobalHandler(GlobalHandler &&) = delete;
@@ -96,19 +93,18 @@ class GlobalHandler {
9693
void attachScheduler(Scheduler *Scheduler);
9794

9895
private:
96+
// Constructor and destructor are declared out-of-line to allow incomplete
97+
// types as template arguments to unique_ptr.
98+
GlobalHandler();
99+
~GlobalHandler();
100+
99101
bool OkToDefer = true;
100102

101103
friend void shutdown_early(bool);
102104
friend void shutdown_late();
103105
friend class ObjectUsageCounter;
104-
static GlobalHandler *&getInstancePtr();
105106
static SpinLock MSyclGlobalHandlerProtector;
106107

107-
// Constructor and destructor are declared out-of-line to allow incomplete
108-
// types as template arguments to unique_ptr.
109-
GlobalHandler();
110-
~GlobalHandler();
111-
112108
template <typename T> struct InstWithLock {
113109
std::unique_ptr<T> Inst;
114110
SpinLock Lock;
@@ -135,7 +131,10 @@ class GlobalHandler {
135131
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
136132
InstWithLock<std::deque<DeviceKernelInfo>> MDeviceKernelInfoStorage;
137133
#endif
134+
135+
static GlobalHandler *RTGlobalObjHandler;
138136
};
137+
139138
} // namespace detail
140139
} // namespace _V1
141140
} // namespace sycl

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,6 @@ static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog,
6666
}
6767
}
6868

69-
ProgramManager &ProgramManager::getInstance() {
70-
return GlobalHandler::instance().getProgramManager();
71-
}
72-
7369
static Managed<ur_program_handle_t>
7470
createBinaryProgram(context_impl &Context, devices_range Devices,
7571
const uint8_t **Binaries, size_t *Lengths,
@@ -1813,14 +1809,6 @@ void ProgramManager::cacheKernelImplicitLocalArg(
18131809
}
18141810
}
18151811

1816-
std::optional<int>
1817-
ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
1818-
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
1819-
if (it != m_KernelImplicitLocalArgPos.end())
1820-
return it->second;
1821-
return {};
1822-
}
1823-
18241812
DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
18251813
const CompileTimeKernelInfoTy &Info) {
18261814
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
@@ -2355,24 +2343,6 @@ ProgramManager::getBinImageState(const RTDeviceBinaryImage *BinImage) {
23552343
: sycl::bundle_state::object;
23562344
}
23572345

2358-
std::optional<kernel_id>
2359-
ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) {
2360-
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2361-
2362-
auto KernelID = m_KernelName2KernelIDs.find(KernelName);
2363-
if (KernelID == m_KernelName2KernelIDs.end())
2364-
return std::nullopt;
2365-
2366-
return KernelID->second;
2367-
}
2368-
2369-
kernel_id ProgramManager::getSYCLKernelID(KernelNameStrRefT KernelName) {
2370-
if (std::optional<kernel_id> MaybeKernelID = tryGetSYCLKernelID(KernelName))
2371-
return *MaybeKernelID;
2372-
throw exception(make_error_code(errc::runtime),
2373-
"No kernel found with the specified name");
2374-
}
2375-
23762346
bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) {
23772347
std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
23782348

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,9 @@ class ProgramManager {
133133
public:
134134
// Returns the single instance of the program manager for the entire
135135
// process. Can only be called after staticInit is done.
136-
static ProgramManager &getInstance();
136+
static ProgramManager &getInstance() {
137+
return GlobalHandler::instance().getProgramManager();
138+
}
137139

138140
const RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName,
139141
context_impl &ContextImpl,
@@ -571,6 +573,33 @@ class ProgramManager {
571573

572574
friend class ::ProgramManagerTest;
573575
};
576+
577+
inline std::optional<int>
578+
ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
579+
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
580+
if (it != m_KernelImplicitLocalArgPos.end())
581+
return it->second;
582+
return {};
583+
}
584+
585+
inline std::optional<kernel_id>
586+
ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) {
587+
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
588+
589+
auto KernelID = m_KernelName2KernelIDs.find(KernelName);
590+
if (KernelID == m_KernelName2KernelIDs.end())
591+
return std::nullopt;
592+
593+
return KernelID->second;
594+
}
595+
596+
inline kernel_id ProgramManager::getSYCLKernelID(KernelNameStrRefT KernelName) {
597+
if (std::optional<kernel_id> MaybeKernelID = tryGetSYCLKernelID(KernelName))
598+
return *MaybeKernelID;
599+
throw exception(make_error_code(errc::runtime),
600+
"No kernel found with the specified name");
601+
}
602+
574603
} // namespace detail
575604
} // namespace _V1
576605
} // namespace sycl

0 commit comments

Comments
 (0)