Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,13 @@ event queue_impl::memcpyFromDeviceGlobal(
}

sycl::detail::optional<event> queue_impl::getLastEvent() {
{
// The external event is required to finish last if set, so it is considered
// the last event if present.
std::lock_guard<std::mutex> Lock(MInOrderExternalEventMtx);
if (MInOrderExternalEvent)
return *MInOrderExternalEvent;
}
// The external event is required to finish last if set, so it is considered
// the last event if present.
if (std::optional<event> ExternalEvent = MInOrderExternalEvent.read(
[](std::optional<event> &InOrderExternalEvent) {
return InOrderExternalEvent;
}))
return ExternalEvent;

std::lock_guard<std::mutex> Lock{MMutex};
if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr)
Expand Down Expand Up @@ -618,12 +618,12 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
WeakEvents.swap(MEventsWeak);
SharedEvents.swap(MEventsShared);

{
std::lock_guard<std::mutex> RequestLock(MMissedCleanupRequestsMtx);
for (auto &UpdatedGraph : MMissedCleanupRequests)
doUnenqueuedCommandCleanup(UpdatedGraph);
MMissedCleanupRequests.clear();
}
MMissedCleanupRequests.get(
[this](MissedCleanupRequestsType &MissedCleanupRequests) {
for (auto &UpdatedGraph : MissedCleanupRequests)
doUnenqueuedCommandCleanup(UpdatedGraph);
MissedCleanupRequests.clear();
});
}
// If the queue is either a host one or does not support OOO (and we use
// multiple in-order queues as a result of that), wait for each event
Expand Down Expand Up @@ -799,8 +799,10 @@ void queue_impl::revisitUnenqueuedCommandsState(
if (Lock.owns_lock())
doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph());
else {
std::lock_guard<std::mutex> RequestLock(MMissedCleanupRequestsMtx);
MMissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph());
MMissedCleanupRequests.put(
[CompletedHostTask](MissedCleanupRequestsType &MissedCleanupRequests) {
MissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph());
});
}
}

Expand Down
69 changes: 51 additions & 18 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -710,14 +710,19 @@ class queue_impl {
void *getTraceEvent() { return MTraceEvent; }

void setExternalEvent(const event &Event) {
std::lock_guard<std::mutex> Lock(MInOrderExternalEventMtx);
MInOrderExternalEvent = Event;
MInOrderExternalEvent.put(
[&Event](std::optional<event> &InOrderExternalEvent) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't mind [&](auto &InOrderExternalEvent) { ... } here and similarly elsewhere, but won't insist.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

InOrderExternalEvent = Event;
});
}

std::optional<event> popExternalEvent() {
std::lock_guard<std::mutex> Lock(MInOrderExternalEventMtx);
std::optional<event> Result = std::nullopt;
std::swap(Result, MInOrderExternalEvent);

MInOrderExternalEvent.get(
[&Result](std::optional<event> &InOrderExternalEvent) {
std::swap(Result, InOrderExternalEvent);
});
return Result;
}

Expand Down Expand Up @@ -833,12 +838,12 @@ class queue_impl {
// dependency so in the case where some commands were not enqueued
// (blocked), we track them to prevent barrier from being enqueued
// earlier.
{
std::lock_guard<std::mutex> RequestLock(MMissedCleanupRequestsMtx);
for (auto &UpdatedGraph : MMissedCleanupRequests)
doUnenqueuedCommandCleanup(UpdatedGraph);
MMissedCleanupRequests.clear();
}
MMissedCleanupRequests.get(
[this](MissedCleanupRequestsType &MissedCleanupRequests) {
for (auto &UpdatedGraph : MissedCleanupRequests)
doUnenqueuedCommandCleanup(UpdatedGraph);
MissedCleanupRequests.clear();
});
auto &Deps = MGraph.expired() ? MDefaultGraphDeps : MExtGraphDeps;
if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents.empty()) {
Handler.depends_on(Deps.UnenqueuedCmdEvents);
Expand All @@ -850,12 +855,12 @@ class queue_impl {
auto EventRet = Handler.finalize();
EventImplPtr EventRetImpl = getSyclObjImpl(EventRet);
if (Type == CGType::CodeplayHostTask)
Deps.UnenqueuedCmdEvents.push_back(EventRetImpl);
Deps.UnenqueuedCmdEvents.push_back(std::move(EventRetImpl));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getSyclObjImpl(EventRet) vs std::move(EventRetImpl) is almost equal, I don't think the temporary variable is justified anymore. IMO, inline it and maybe add a comment for the future as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's turn out this was already modified on sycl branch (reference used instead). So, dropped from the patch.

else if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) {
Deps.LastBarrier = EventRetImpl;
Deps.LastBarrier = std::move(EventRetImpl);
Deps.UnenqueuedCmdEvents.clear();
} else if (!EventRetImpl->isEnqueued()) {
Deps.UnenqueuedCmdEvents.push_back(EventRetImpl);
Deps.UnenqueuedCmdEvents.push_back(std::move(EventRetImpl));
}

return EventRet;
Expand Down Expand Up @@ -1022,6 +1027,35 @@ class queue_impl {
}
} MDefaultGraphDeps, MExtGraphDeps;

// implement check-lock-check pattern to not lock empty MData
template <typename DataType> class CheckLockCheck {
DataType MData;
std::atomic_bool MDataPresent = false;
mutable std::mutex MDataMtx;

public:
template <typename F> void put(F &&func) {
std::lock_guard<std::mutex> Lock(MDataMtx);
MDataPresent.store(true, std::memory_order_release);
func(MData);
}
template <typename F> void get(F &&func) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think modifications on get might be unexpected. Would pop work as a name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Then, it might be reasonable to change put to pop.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean push?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, typo. Yes, push/pop instead of put/get.

if (MDataPresent.load(std::memory_order_acquire)) {
std::lock_guard<std::mutex> Lock(MDataMtx);
if (MDataPresent.load(std::memory_order_acquire)) {
func(MData);
MDataPresent.store(false, std::memory_order_release);
}
}
}
template <typename F> DataType read(F &&func) {
if (!MDataPresent.load(std::memory_order_acquire))
return DataType{};
std::lock_guard<std::mutex> Lock(MDataMtx);
return func(MData);
}
};

const bool MIsInorder;

std::vector<EventImplPtr> MStreamsServiceEvents;
Expand All @@ -1044,8 +1078,7 @@ class queue_impl {
// an additional dependency for the subsequent submission in to the queue.
// Access to the event should be guarded with MInOrderExternalEventMtx.
// NOTE: std::optional must not be exposed in the ABI.
std::optional<event> MInOrderExternalEvent;
mutable std::mutex MInOrderExternalEventMtx;
CheckLockCheck<std::optional<event>> MInOrderExternalEvent;

public:
// Queue constructed with the discard_events property
Expand All @@ -1068,9 +1101,9 @@ class queue_impl {
unsigned long long MQueueID;
static std::atomic<unsigned long long> MNextAvailableQueueID;

std::deque<std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>>
MMissedCleanupRequests;
std::mutex MMissedCleanupRequestsMtx;
using MissedCleanupRequestsType = std::deque<
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>>;
CheckLockCheck<MissedCleanupRequestsType> MMissedCleanupRequests;

friend class sycl::ext::oneapi::experimental::detail::node_impl;

Expand Down
Loading