Skip to content

Commit 05db68c

Browse files
[NFC][SYCL] Add events_range helper (#19608)
Makes code a bit cleaner and also clearly communicates intent of the interfaces being changed that they don't really need `shared_ptr` nor perform life time manipulations. Also avoids unnecessary copies for those APIs that forgot to pass `std::vector` by reference (see `memory_manager.hpp`).
1 parent ea4d68c commit 05db68c

File tree

9 files changed

+96
-111
lines changed

9 files changed

+96
-111
lines changed

sycl/source/detail/async_alloc.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@ inline namespace _V1 {
2222
namespace ext::oneapi::experimental {
2323

2424
namespace {
25-
std::vector<ur_event_handle_t>
26-
getUrEvents(const std::vector<std::shared_ptr<detail::event_impl>> &DepEvents) {
25+
std::vector<ur_event_handle_t> getUrEvents(detail::events_range DepEvents) {
2726
std::vector<ur_event_handle_t> RetUrEvents;
28-
for (const std::shared_ptr<detail::event_impl> &EventImpl : DepEvents) {
29-
ur_event_handle_t Handle = EventImpl->getHandle();
27+
for (detail::event_impl &Event : DepEvents) {
28+
ur_event_handle_t Handle = Event.getHandle();
3029
if (Handle != nullptr)
3130
RetUrEvents.push_back(Handle);
3231
}

sycl/source/detail/event_impl.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
#pragma once
1010

1111
#include <detail/adapter_impl.hpp>
12+
#include <detail/helpers.hpp>
1213
#include <sycl/detail/cl.h>
1314
#include <sycl/detail/common.hpp>
1415
#include <sycl/detail/host_profiling_info.hpp>
1516
#include <sycl/detail/ur.hpp>
17+
#include <sycl/event.hpp>
1618
#include <sycl/info/info_desc.hpp>
1719

1820
#include <atomic>
@@ -458,6 +460,19 @@ class event_impl {
458460
bool MIsHostEvent = false;
459461
};
460462

463+
using events_iterator =
464+
variadic_iterator<event,
465+
std::vector<std::shared_ptr<event_impl>>::const_iterator,
466+
std::vector<event>::const_iterator,
467+
std::vector<event_impl *>::const_iterator, event_impl *>;
468+
469+
class events_range : public iterator_range<events_iterator> {
470+
private:
471+
using Base = iterator_range<events_iterator>;
472+
473+
public:
474+
using Base::Base;
475+
};
461476
} // namespace detail
462477
} // namespace _V1
463478
} // namespace sycl

sycl/source/detail/memory_manager.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -118,15 +118,19 @@ void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
118118
#endif
119119
}
120120

121-
static void waitForEvents(const std::vector<EventImplPtr> &Events) {
121+
static void waitForEvents(events_range Events) {
122122
// Assuming all events will be on the same device or
123123
// devices associated with the same Backend.
124124
if (!Events.empty()) {
125-
adapter_impl &Adapter = Events[0]->getAdapter();
125+
adapter_impl &Adapter = Events.front().getAdapter();
126126
std::vector<ur_event_handle_t> UrEvents(Events.size());
127-
std::transform(
128-
Events.begin(), Events.end(), UrEvents.begin(),
129-
[](const EventImplPtr &EventImpl) { return EventImpl->getHandle(); });
127+
std::transform(Events.begin(), Events.end(), UrEvents.begin(),
128+
[](event_impl &Event) { return Event.getHandle(); });
129+
// TODO: Why this condition??? Added during PI Removal in
130+
// https://github.com/intel/llvm/pull/14145 with no explanation.
131+
// Should we just filter out all `nullptr`, not only the one in the first
132+
// element?
133+
assert(!UrEvents.empty() && UrEvents[0]);
130134
if (!UrEvents.empty() && UrEvents[0]) {
131135
Adapter.call<UrApiKind::urEventWait>(UrEvents.size(), &UrEvents[0]);
132136
}
@@ -251,8 +255,7 @@ void memUnmapHelper(adapter_impl &Adapter, ur_queue_handle_t Queue,
251255
}
252256

253257
void MemoryManager::release(context_impl *TargetContext, SYCLMemObjI *MemObj,
254-
void *MemAllocation,
255-
std::vector<EventImplPtr> DepEvents,
258+
void *MemAllocation, events_range DepEvents,
256259
ur_event_handle_t &OutEvent) {
257260
// There is no async API for memory releasing. Explicitly wait for all
258261
// dependency events and return empty event.
@@ -281,7 +284,7 @@ void MemoryManager::releaseMemObj(context_impl *TargetContext,
281284

282285
void *MemoryManager::allocate(context_impl *TargetContext, SYCLMemObjI *MemObj,
283286
bool InitFromUserData, void *HostPtr,
284-
std::vector<EventImplPtr> DepEvents,
287+
events_range DepEvents,
285288
ur_event_handle_t &OutEvent) {
286289
// There is no async API for memory allocation. Explicitly wait for all
287290
// dependency events and return empty event.
@@ -432,7 +435,7 @@ void *MemoryManager::allocateMemImage(
432435
void *MemoryManager::allocateMemSubBuffer(context_impl *TargetContext,
433436
void *ParentMemObj, size_t ElemSize,
434437
size_t Offset, range<3> Range,
435-
std::vector<EventImplPtr> DepEvents,
438+
events_range DepEvents,
436439
ur_event_handle_t &OutEvent) {
437440
waitForEvents(DepEvents);
438441
OutEvent = nullptr;

sycl/source/detail/memory_manager.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ namespace detail {
2626

2727
class queue_impl;
2828
class event_impl;
29+
class events_range;
2930
class context_impl;
3031

3132
using EventImplPtr = std::shared_ptr<detail::event_impl>;
@@ -38,22 +39,21 @@ class MemoryManager {
3839
// The following method releases memory allocation of memory object.
3940
// Depending on the context it releases memory on host or on device.
4041
static void release(context_impl *TargetContext, SYCLMemObjI *MemObj,
41-
void *MemAllocation, std::vector<EventImplPtr> DepEvents,
42+
void *MemAllocation, events_range DepEvents,
4243
ur_event_handle_t &OutEvent);
4344

4445
// The following method allocates memory allocation of memory object.
4546
// Depending on the context it allocates memory on host or on device.
4647
static void *allocate(context_impl *TargetContext, SYCLMemObjI *MemObj,
4748
bool InitFromUserData, void *HostPtr,
48-
std::vector<EventImplPtr> DepEvents,
49-
ur_event_handle_t &OutEvent);
49+
events_range DepEvents, ur_event_handle_t &OutEvent);
5050

5151
// The following method creates OpenCL sub buffer for specified
5252
// offset, range, and memory object.
5353
static void *allocateMemSubBuffer(context_impl *TargetContext,
5454
void *ParentMemObj, size_t ElemSize,
5555
size_t Offset, range<3> Range,
56-
std::vector<EventImplPtr> DepEvents,
56+
events_range DepEvents,
5757
ur_event_handle_t &OutEvent);
5858

5959
// Allocates buffer in specified context taking into account situations such

sycl/source/detail/queue_impl.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -726,8 +726,8 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
726726
return false;
727727

728728
if (MDefaultGraphDeps.LastEventPtr != nullptr &&
729-
!Scheduler::CheckEventReadiness(*MContext,
730-
MDefaultGraphDeps.LastEventPtr))
729+
!Scheduler::areEventsSafeForSchedulerBypass(
730+
{*MDefaultGraphDeps.LastEventPtr}, *MContext))
731731
return false;
732732

733733
MNoLastEventMode.store(true, std::memory_order_relaxed);
@@ -746,7 +746,8 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
746746

747747
auto Event = parseEvent(Handler.finalize());
748748

749-
if (Event && !Scheduler::CheckEventReadiness(*MContext, Event)) {
749+
if (Event &&
750+
!Scheduler::areEventsSafeForSchedulerBypass({*Event}, *MContext)) {
750751
MDefaultGraphDeps.LastEventPtr = Event;
751752
MNoLastEventMode.store(false, std::memory_order_relaxed);
752753
}

sycl/source/detail/scheduler/commands.cpp

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -231,20 +231,20 @@ static std::string commandToName(Command::CommandType Type) {
231231
}
232232
#endif
233233

234-
std::vector<ur_event_handle_t>
235-
Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls,
236-
queue_impl *CommandQueue, bool IsHostTaskCommand) {
234+
std::vector<ur_event_handle_t> Command::getUrEvents(events_range Events,
235+
queue_impl *CommandQueue,
236+
bool IsHostTaskCommand) {
237237
std::vector<ur_event_handle_t> RetUrEvents;
238-
for (auto &EventImpl : EventImpls) {
239-
auto Handle = EventImpl->getHandle();
238+
for (event_impl &Event : Events) {
239+
auto Handle = Event.getHandle();
240240
if (Handle == nullptr)
241241
continue;
242242

243243
// Do not add redundant event dependencies for in-order queues.
244244
// At this stage dependency is definitely ur task and need to check if
245245
// current one is a host task. In this case we should not skip ur event due
246246
// to different sync mechanisms for different task types on in-order queue.
247-
if (CommandQueue && EventImpl->getWorkerQueue().get() == CommandQueue &&
247+
if (CommandQueue && Event.getWorkerQueue().get() == CommandQueue &&
248248
CommandQueue->isInOrder() && !IsHostTaskCommand)
249249
continue;
250250

@@ -254,39 +254,34 @@ Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls,
254254
return RetUrEvents;
255255
}
256256

257-
std::vector<ur_event_handle_t>
258-
Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls) const {
259-
return getUrEvents(EventImpls, MWorkerQueue.get(), isHostTask());
257+
std::vector<ur_event_handle_t> Command::getUrEvents(events_range Events) const {
258+
return getUrEvents(Events, MWorkerQueue.get(), isHostTask());
260259
}
261260

262261
// This function is implemented (duplicating getUrEvents a lot) as short term
263262
// solution for the issue that barrier with wait list could not
264263
// handle empty ur event handles when kernel is enqueued on host task
265264
// completion.
266265
std::vector<ur_event_handle_t>
267-
Command::getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
268-
bool HasEventMode) const {
266+
Command::getUrEventsBlocking(events_range Events, bool HasEventMode) const {
269267
std::vector<ur_event_handle_t> RetUrEvents;
270-
for (auto &EventImpl : EventImpls) {
268+
for (event_impl &Event : Events) {
271269
// Throwaway events created with empty constructor will not have a context
272270
// (which is set lazily) calling getContextImpl() would set that
273271
// context, which we wish to avoid as it is expensive.
274272
// Skip host task and NOP events also.
275-
if (EventImpl->isDefaultConstructed() || EventImpl->isHost() ||
276-
EventImpl->isNOP())
273+
if (Event.isDefaultConstructed() || Event.isHost() || Event.isNOP())
277274
continue;
278275

279276
// If command has not been enqueued then we have to enqueue it.
280277
// It may happen if async enqueue in a host task is involved.
281278
// Interoperability events are special cases and they are not enqueued, as
282279
// they don't have an associated queue and command.
283-
if (!EventImpl->isInterop() && !EventImpl->isEnqueued()) {
284-
if (!EventImpl->getCommand() ||
285-
!EventImpl->getCommand()->producesPiEvent())
280+
if (!Event.isInterop() && !Event.isEnqueued()) {
281+
if (!Event.getCommand() || !Event.getCommand()->producesPiEvent())
286282
continue;
287283
std::vector<Command *> AuxCmds;
288-
Scheduler::getInstance().enqueueCommandForCG(*EventImpl, AuxCmds,
289-
BLOCKING);
284+
Scheduler::getInstance().enqueueCommandForCG(Event, AuxCmds, BLOCKING);
290285
}
291286
// Do not add redundant event dependencies for in-order queues.
292287
// At this stage dependency is definitely ur task and need to check if
@@ -296,11 +291,11 @@ Command::getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
296291
// redundant events may still differ from the resulting event, so they are
297292
// kept.
298293
if (!HasEventMode && MWorkerQueue &&
299-
EventImpl->getWorkerQueue() == MWorkerQueue &&
300-
MWorkerQueue->isInOrder() && !isHostTask())
294+
Event.getWorkerQueue() == MWorkerQueue && MWorkerQueue->isInOrder() &&
295+
!isHostTask())
301296
continue;
302297

303-
RetUrEvents.push_back(EventImpl->getHandle());
298+
RetUrEvents.push_back(Event.getHandle());
304299
}
305300

306301
return RetUrEvents;

sycl/source/detail/scheduler/commands.hpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -236,21 +236,19 @@ class Command {
236236
/// Returns true iff this command is ready to be submitted for cleanup.
237237
virtual bool readyForCleanup() const;
238238

239-
/// Collect UR events from EventImpls and filter out some of them in case of
240-
/// in order queue
241-
std::vector<ur_event_handle_t>
242-
getUrEvents(const std::vector<EventImplPtr> &EventImpls) const;
239+
/// Collect UR events from Events and filter out some of them in case of
240+
/// in order queue.
241+
std::vector<ur_event_handle_t> getUrEvents(events_range Events) const;
243242

244-
static std::vector<ur_event_handle_t>
245-
getUrEvents(const std::vector<EventImplPtr> &EventImpls,
246-
queue_impl *CommandQueue, bool IsHostTaskCommand);
243+
static std::vector<ur_event_handle_t> getUrEvents(events_range Events,
244+
queue_impl *CommandQueue,
245+
bool IsHostTaskCommand);
247246

248247
/// Collect UR events from EventImpls and filter out some of them in case of
249248
/// in order queue. Does blocking enqueue if event is expected to produce ur
250249
/// event but has empty native handle.
251-
std::vector<ur_event_handle_t>
252-
getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
253-
bool HasEventMode) const;
250+
std::vector<ur_event_handle_t> getUrEventsBlocking(events_range Events,
251+
bool HasEventMode) const;
254252

255253
bool isHostTask() const;
256254

@@ -275,9 +273,9 @@ class Command {
275273

276274
void waitForPreparedHostEvents() const;
277275

278-
void flushCrossQueueDeps(const std::vector<EventImplPtr> &EventImpls) {
279-
for (auto &EventImpl : EventImpls) {
280-
EventImpl->flushIfNeeded(MWorkerQueue.get());
276+
void flushCrossQueueDeps(events_range Events) {
277+
for (event_impl &Event : Events) {
278+
Event.flushIfNeeded(MWorkerQueue.get());
281279
}
282280
}
283281

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 25 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -411,11 +411,11 @@ void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req,
411411
EnqueueLeaves(Record->MWriteLeaves);
412412
}
413413

414-
void Scheduler::enqueueUnblockedCommands(
415-
const std::vector<EventImplPtr> &ToEnqueue, ReadLockT &GraphReadLock,
416-
std::vector<Command *> &ToCleanUp) {
417-
for (auto &Event : ToEnqueue) {
418-
Command *Cmd = Event->getCommand();
414+
void Scheduler::enqueueUnblockedCommands(events_range ToEnqueue,
415+
ReadLockT &GraphReadLock,
416+
std::vector<Command *> &ToCleanUp) {
417+
for (event_impl &Event : ToEnqueue) {
418+
Command *Cmd = Event.getCommand();
419419
if (!Cmd)
420420
continue;
421421
EnqueueResultT Res;
@@ -684,46 +684,28 @@ EventImplPtr Scheduler::addCommandGraphUpdate(
684684
return NewCmdEvent;
685685
}
686686

687-
bool Scheduler::CheckEventReadiness(context_impl &Context,
688-
const EventImplPtr &SyclEventImplPtr) {
689-
// Events that don't have an initialized context are throwaway events that
690-
// don't represent actual dependencies. Calling getContextImpl() would set
691-
// their context, which we wish to avoid as it is expensive.
692-
// NOP events also don't represent actual dependencies.
693-
if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) {
694-
return true;
695-
}
696-
if (SyclEventImplPtr->isHost()) {
697-
return SyclEventImplPtr->isCompleted();
698-
}
699-
// Cross-context dependencies can't be passed to the backend directly.
700-
if (&SyclEventImplPtr->getContextImpl() != &Context)
701-
return false;
702-
703-
// A nullptr here means that the commmand does not produce a UR event or it
704-
// hasn't been enqueued yet.
705-
return SyclEventImplPtr->getHandle() != nullptr;
706-
}
707-
708-
bool Scheduler::areEventsSafeForSchedulerBypass(
709-
const std::vector<sycl::event> &DepEvents, context_impl &Context) {
710-
711-
return std::all_of(
712-
DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) {
713-
const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
714-
return CheckEventReadiness(Context, SyclEventImplPtr);
715-
});
716-
}
717-
718-
bool Scheduler::areEventsSafeForSchedulerBypass(
719-
const std::vector<EventImplPtr> &DepEvents, context_impl &Context) {
687+
bool Scheduler::areEventsSafeForSchedulerBypass(events_range DepEvents,
688+
context_impl &Context) {
689+
return all_of(DepEvents, [&Context](sycl::detail::event_impl &Event) {
690+
// Events that don't have an initialized context are throwaway events that
691+
// don't represent actual dependencies. Calling getContextImpl() would set
692+
// their context, which we wish to avoid as it is expensive.
693+
// NOP events also don't represent actual dependencies.
694+
if (Event.isDefaultConstructed() || Event.isNOP())
695+
return true;
696+
697+
if (Event.isHost())
698+
return Event.isCompleted();
699+
700+
// Cross-context dependencies can't be passed to the backend directly.
701+
if (&Event.getContextImpl() != &Context)
702+
return false;
720703

721-
return std::all_of(DepEvents.begin(), DepEvents.end(),
722-
[&Context](const EventImplPtr &SyclEventImplPtr) {
723-
return CheckEventReadiness(Context, SyclEventImplPtr);
724-
});
704+
// A nullptr here means that the commmand does not produce a UR event or it
705+
// hasn't been enqueued yet.
706+
return Event.getHandle() != nullptr;
707+
});
725708
}
726-
727709
} // namespace detail
728710
} // namespace _V1
729711
} // namespace sycl

0 commit comments

Comments
 (0)