Skip to content

Commit baa17a7

Browse files
committed
[SYCL] optimize enqueueImpKernel by inlining
1 parent f538636 commit baa17a7

File tree

10 files changed

+98
-141
lines changed

10 files changed

+98
-141
lines changed

sycl/source/detail/context_impl.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -263,22 +263,6 @@ context_impl::get_backend_info<info::device::backend_version>() const {
263263
}
264264
#endif
265265

266-
ur_context_handle_t &context_impl::getHandleRef() { return MContext; }
267-
const ur_context_handle_t &context_impl::getHandleRef() const {
268-
return MContext;
269-
}
270-
271-
KernelProgramCache &context_impl::getKernelProgramCache() const {
272-
return MKernelProgramCache;
273-
}
274-
275-
bool context_impl::hasDevice(const detail::device_impl &Device) const {
276-
for (device_impl *D : MDevices)
277-
if (D == &Device)
278-
return true;
279-
return false;
280-
}
281-
282266
device_impl *
283267
context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const {
284268
for (device_impl *D : MDevices)

sycl/source/detail/context_impl.hpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
116116
/// reference will be invalid if context_impl was destroyed.
117117
///
118118
/// \return an instance of raw UR context handle.
119-
ur_context_handle_t &getHandleRef();
119+
ur_context_handle_t &getHandleRef() { return MContext; }
120120

121121
/// Gets the underlying context object (if any) without reference count
122122
/// modification.
@@ -126,7 +126,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
126126
/// reference will be invalid if context_impl was destroyed.
127127
///
128128
/// \return an instance of raw UR context handle.
129-
const ur_context_handle_t &getHandleRef() const;
129+
const ur_context_handle_t &getHandleRef() const { return MContext; }
130130

131131
devices_range getDevices() const { return MDevices; }
132132

@@ -151,10 +151,15 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
151151
return {MCachedLibPrograms, MCachedLibProgramsMutex};
152152
}
153153

154-
KernelProgramCache &getKernelProgramCache() const;
154+
KernelProgramCache &getKernelProgramCache() const {
155+
return MKernelProgramCache;
156+
}
155157

156158
/// Returns true if and only if context contains the given device.
157-
bool hasDevice(const detail::device_impl &Device) const;
159+
bool hasDevice(const detail::device_impl &Device) const {
160+
return std::any_of(MDevices.begin(), MDevices.end(),
161+
[&](auto *D) { return D == &Device; });
162+
}
158163

159164
/// Returns true if and only if the device can be used within this context.
160165
/// For OpenCL this is currently equivalent to hasDevice, for other backends

sycl/source/detail/device_kernel_info.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -73,24 +73,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
7373
assert(Info == *this);
7474
}
7575

76-
FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
77-
assertInitialized();
78-
return MFastKernelSubcache;
79-
}
80-
81-
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
82-
assertInitialized();
83-
return MImplicitLocalArgPos;
84-
}
85-
86-
bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }
87-
88-
void DeviceKernelInfo::assertInitialized() {
89-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
90-
assert(MInitialized.load() && "Data needs to be initialized before use");
91-
#endif
92-
}
93-
9476
} // namespace detail
9577
} // namespace _V1
9678
} // namespace sycl

sycl/source/detail/device_kernel_info.hpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,23 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
107107
#endif
108108
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);
109109

110-
FastKernelSubcacheT &getKernelSubcache();
111-
const std::optional<int> &getImplicitLocalArgPos();
110+
FastKernelSubcacheT &getKernelSubcache() {
111+
assertInitialized();
112+
return MFastKernelSubcache;
113+
}
114+
115+
std::optional<int> getImplicitLocalArgPos() const {
116+
assertInitialized();
117+
return MImplicitLocalArgPos;
118+
}
112119

113120
private:
114-
void assertInitialized();
115-
bool isCompileTimeInfoSet() const;
121+
void assertInitialized() const {
122+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
123+
assert(MInitialized.load() && "Data needs to be initialized before use");
124+
#endif
125+
}
126+
bool isCompileTimeInfoSet() const { return KernelSize != 0; }
116127

117128
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
118129
std::atomic<bool> MInitialized = false;

sycl/source/detail/global_handler.cpp

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,7 @@ void GlobalHandler::TraceEventXPTI(const char *Message) {
105105
#endif
106106
}
107107

108-
GlobalHandler *&GlobalHandler::getInstancePtr() {
109-
static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
110-
return RTGlobalObjHandler;
111-
}
112-
113-
GlobalHandler &GlobalHandler::instance() {
114-
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
115-
assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
116-
return *RTGlobalObjHandler;
117-
}
118-
119-
bool GlobalHandler::isInstanceAlive() {
120-
return GlobalHandler::getInstancePtr();
121-
}
108+
GlobalHandler *GlobalHandler::RTGlobalObjHandler = new GlobalHandler();
122109

123110
template <typename T, typename... Types>
124111
T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types &&...Args) {
@@ -331,8 +318,7 @@ void GlobalHandler::drainThreadPool() {
331318
// 2) when process is being terminated
332319
void shutdown_early(bool CanJoinThreads = true) {
333320
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
334-
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
335-
if (!Handler)
321+
if (!GlobalHandler::RTGlobalObjHandler)
336322
return;
337323

338324
#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32)
@@ -342,26 +328,26 @@ void shutdown_early(bool CanJoinThreads = true) {
342328
#endif
343329

344330
// Now that we are shutting down, we will no longer defer MemObj releases.
345-
Handler->endDeferredRelease();
331+
GlobalHandler::RTGlobalObjHandler->endDeferredRelease();
346332

347333
// Ensure neither host task is working so that no default context is accessed
348334
// upon its release
349-
Handler->prepareSchedulerToRelease(true);
335+
GlobalHandler::RTGlobalObjHandler->prepareSchedulerToRelease(true);
350336

351-
if (Handler->MHostTaskThreadPool.Inst) {
352-
Handler->MHostTaskThreadPool.Inst->finishAndWait(CanJoinThreads);
353-
Handler->MHostTaskThreadPool.Inst.reset(nullptr);
337+
if (GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst) {
338+
GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst->finishAndWait(
339+
CanJoinThreads);
340+
GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst.reset(nullptr);
354341
}
355342

356343
// This releases OUR reference to the default context, but
357344
// other may yet have refs
358-
Handler->releaseDefaultContexts();
345+
GlobalHandler::RTGlobalObjHandler->releaseDefaultContexts();
359346
}
360347

361348
void shutdown_late() {
362349
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
363-
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
364-
if (!Handler)
350+
if (!GlobalHandler::RTGlobalObjHandler)
365351
return;
366352

367353
#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32)
@@ -371,26 +357,27 @@ void shutdown_late() {
371357
#endif
372358

373359
// First, release resources, that may access adapters.
374-
Handler->MPlatformCache.Inst.reset(nullptr);
375-
Handler->MScheduler.Inst.reset(nullptr);
376-
Handler->MProgramManager.Inst.reset(nullptr);
360+
GlobalHandler::RTGlobalObjHandler->MPlatformCache.Inst.reset(nullptr);
361+
GlobalHandler::RTGlobalObjHandler->MScheduler.Inst.reset(nullptr);
362+
GlobalHandler::RTGlobalObjHandler->MProgramManager.Inst.reset(nullptr);
377363

378364
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
379365
// Kernel cache, which is part of device kernel info,
380366
// stores handles to the adapter, so clear it before releasing adapters.
381-
Handler->MDeviceKernelInfoStorage.Inst.reset(nullptr);
367+
GlobalHandler::RTGlobalObjHandler->MDeviceKernelInfoStorage.Inst.reset(
368+
nullptr);
382369
#endif
383370

384371
// Clear the adapters and reset the instance if it was there.
385-
Handler->unloadAdapters();
386-
if (Handler->MAdapters.Inst)
387-
Handler->MAdapters.Inst.reset(nullptr);
372+
GlobalHandler::RTGlobalObjHandler->unloadAdapters();
373+
if (GlobalHandler::RTGlobalObjHandler->MAdapters.Inst)
374+
GlobalHandler::RTGlobalObjHandler->MAdapters.Inst.reset(nullptr);
388375

389-
Handler->MXPTIRegistry.Inst.reset(nullptr);
376+
GlobalHandler::RTGlobalObjHandler->MXPTIRegistry.Inst.reset(nullptr);
390377

391378
// Release the rest of global resources.
392-
delete Handler;
393-
Handler = nullptr;
379+
delete GlobalHandler::RTGlobalObjHandler;
380+
GlobalHandler::RTGlobalObjHandler = nullptr;
394381
}
395382

396383
#ifdef _WIN32

sycl/source/detail/global_handler.hpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,11 @@ class DeviceKernelInfo;
4848
/// construction or destruction is generated anyway.
4949
class GlobalHandler {
5050
public:
51-
/// \return a reference to a GlobalHandler singleton instance. Memory for
52-
/// storing objects is allocated on first call. The reference is valid as long
53-
/// as runtime library is loaded (i.e. untill `DllMain` or
51+
static bool isInstanceAlive() { return RTGlobalObjHandler != nullptr; }
52+
/// \return a reference to a GlobalHandler singleton instance. The reference
53+
/// is valid as long as runtime library is loaded (i.e. untill `DllMain` or
5454
/// `__attribute__((destructor))` is called).
55-
static GlobalHandler &instance();
56-
57-
/// \return true if the instance has not been deallocated yet.
58-
static bool isInstanceAlive();
55+
static GlobalHandler &instance() { return *RTGlobalObjHandler; }
5956

6057
GlobalHandler(const GlobalHandler &) = delete;
6158
GlobalHandler(GlobalHandler &&) = delete;
@@ -96,19 +93,18 @@ class GlobalHandler {
9693
void attachScheduler(Scheduler *Scheduler);
9794

9895
private:
96+
// Constructor and destructor are declared out-of-line to allow incomplete
97+
// types as template arguments to unique_ptr.
98+
GlobalHandler();
99+
~GlobalHandler();
100+
99101
bool OkToDefer = true;
100102

101103
friend void shutdown_early(bool);
102104
friend void shutdown_late();
103105
friend class ObjectUsageCounter;
104-
static GlobalHandler *&getInstancePtr();
105106
static SpinLock MSyclGlobalHandlerProtector;
106107

107-
// Constructor and destructor are declared out-of-line to allow incomplete
108-
// types as template arguments to unique_ptr.
109-
GlobalHandler();
110-
~GlobalHandler();
111-
112108
template <typename T> struct InstWithLock {
113109
std::unique_ptr<T> Inst;
114110
SpinLock Lock;
@@ -135,7 +131,10 @@ class GlobalHandler {
135131
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
136132
InstWithLock<std::deque<DeviceKernelInfo>> MDeviceKernelInfoStorage;
137133
#endif
134+
135+
static GlobalHandler *RTGlobalObjHandler;
138136
};
137+
139138
} // namespace detail
140139
} // namespace _V1
141140
} // namespace sycl

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,6 @@ static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog,
6666
}
6767
}
6868

69-
ProgramManager &ProgramManager::getInstance() {
70-
return GlobalHandler::instance().getProgramManager();
71-
}
72-
7369
static Managed<ur_program_handle_t>
7470
createBinaryProgram(context_impl &Context, devices_range Devices,
7571
const uint8_t **Binaries, size_t *Lengths,
@@ -1805,14 +1801,6 @@ void ProgramManager::cacheKernelImplicitLocalArg(
18051801
}
18061802
}
18071803

1808-
std::optional<int>
1809-
ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
1810-
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
1811-
if (it != m_KernelImplicitLocalArgPos.end())
1812-
return it->second;
1813-
return {};
1814-
}
1815-
18161804
DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
18171805
const CompileTimeKernelInfoTy &Info) {
18181806
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
@@ -2344,24 +2332,6 @@ ProgramManager::getBinImageState(const RTDeviceBinaryImage *BinImage) {
23442332
: sycl::bundle_state::object;
23452333
}
23462334

2347-
std::optional<kernel_id>
2348-
ProgramManager::tryGetSYCLKernelID(KernelNameStrRefT KernelName) {
2349-
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
2350-
2351-
auto KernelID = m_KernelName2KernelIDs.find(KernelName);
2352-
if (KernelID == m_KernelName2KernelIDs.end())
2353-
return std::nullopt;
2354-
2355-
return KernelID->second;
2356-
}
2357-
2358-
kernel_id ProgramManager::getSYCLKernelID(KernelNameStrRefT KernelName) {
2359-
if (std::optional<kernel_id> MaybeKernelID = tryGetSYCLKernelID(KernelName))
2360-
return *MaybeKernelID;
2361-
throw exception(make_error_code(errc::runtime),
2362-
"No kernel found with the specified name");
2363-
}
2364-
23652335
bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) {
23662336
std::lock_guard<std::mutex> Guard(m_KernelIDsMutex);
23672337

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,9 @@ class ProgramManager {
134134
public:
135135
// Returns the single instance of the program manager for the entire
136136
// process. Can only be called after staticInit is done.
137-
static ProgramManager &getInstance();
137+
static ProgramManager &getInstance() {
138+
return GlobalHandler::instance().getProgramManager();
139+
}
138140

139141
const RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName,
140142
context_impl &ContextImpl,
@@ -236,11 +238,24 @@ class ProgramManager {
236238

237239
// The function returns the unique SYCL kernel identifier associated with a
238240
// kernel name or nullopt if there is no such ID.
239-
std::optional<kernel_id> tryGetSYCLKernelID(KernelNameStrRefT KernelName);
241+
std::optional<kernel_id> tryGetSYCLKernelID(KernelNameStrRefT KernelName) {
242+
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
243+
244+
auto KernelID = m_KernelName2KernelIDs.find(KernelName);
245+
if (KernelID == m_KernelName2KernelIDs.end())
246+
return std::nullopt;
247+
248+
return KernelID->second;
249+
}
240250

241251
// The function returns the unique SYCL kernel identifier associated with a
242252
// kernel name or throws a sycl exception if there is no such ID.
243-
kernel_id getSYCLKernelID(KernelNameStrRefT KernelName);
253+
kernel_id getSYCLKernelID(KernelNameStrRefT KernelName) {
254+
if (std::optional<kernel_id> MaybeKernelID = tryGetSYCLKernelID(KernelName))
255+
return *MaybeKernelID;
256+
throw exception(make_error_code(errc::runtime),
257+
"No kernel found with the specified name");
258+
}
244259

245260
// The function returns a vector containing all unique SYCL kernel identifiers
246261
// in SYCL device images.
@@ -375,7 +390,12 @@ class ProgramManager {
375390
SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; }
376391

377392
std::optional<int>
378-
kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const;
393+
kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
394+
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
395+
if (it != m_KernelImplicitLocalArgPos.end())
396+
return it->second;
397+
return {};
398+
}
379399

380400
DeviceKernelInfo &
381401
getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
@@ -562,6 +582,7 @@ class ProgramManager {
562582

563583
friend class ::ProgramManagerTest;
564584
};
585+
565586
} // namespace detail
566587
} // namespace _V1
567588
} // namespace sycl

0 commit comments

Comments
 (0)