From 5dfc4573204069cd5776649181516b5a98125377 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Fri, 21 Mar 2025 15:31:44 +0100 Subject: [PATCH 01/10] [SYCL] Do not lock unconditionally while access queue_iml members queue_impl::MMissedCleanupRequests and queue_impl::MInOrderExternalEvent are empty on hot path, check a flag instead or before the locking. --- sycl/source/detail/queue_impl.cpp | 4 +++- sycl/source/detail/queue_impl.hpp | 20 +++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index ee091249c5501..036abe79f15c6 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -618,8 +618,9 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { WeakEvents.swap(MEventsWeak); SharedEvents.swap(MEventsShared); - { + if (MAreCleanupRequestsMissed.load(std::memory_order_acquire)) { std::lock_guard RequestLock(MMissedCleanupRequestsMtx); + MAreCleanupRequestsMissed.store(false, std::memory_order_release); for (auto &UpdatedGraph : MMissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); MMissedCleanupRequests.clear(); @@ -801,6 +802,7 @@ void queue_impl::revisitUnenqueuedCommandsState( else { std::lock_guard RequestLock(MMissedCleanupRequestsMtx); MMissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); + MAreCleanupRequestsMissed.store(true, std::memory_order_release); } } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 32d859cfa4c1e..16b82f86ae3aa 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -711,13 +711,18 @@ class queue_impl { void setExternalEvent(const event &Event) { std::lock_guard Lock(MInOrderExternalEventMtx); + MHasValueInOrderExternalEvent.store(true, std::memory_order_release); MInOrderExternalEvent = Event; } std::optional popExternalEvent() { - std::lock_guard Lock(MInOrderExternalEventMtx); std::optional Result = std::nullopt; - std::swap(Result, MInOrderExternalEvent); + + if (MHasValueInOrderExternalEvent.load(std::memory_order_acquire)) { + std::lock_guard Lock(MInOrderExternalEventMtx); + MHasValueInOrderExternalEvent.store(false, std::memory_order_release); + std::swap(Result, MInOrderExternalEvent); + } return Result; } @@ -833,8 +838,10 @@ class queue_impl { // dependency so in the case where some commands were not enqueued // (blocked), we track them to prevent barrier from being enqueued // earlier. + if (MAreCleanupRequestsMissed.load(std::memory_order_acquire)) { std::lock_guard RequestLock(MMissedCleanupRequestsMtx); + MAreCleanupRequestsMissed.store(false, std::memory_order_release); for (auto &UpdatedGraph : MMissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); MMissedCleanupRequests.clear(); @@ -850,12 +857,12 @@ class queue_impl { auto EventRet = Handler.finalize(); EventImplPtr EventRetImpl = getSyclObjImpl(EventRet); if (Type == CGType::CodeplayHostTask) - Deps.UnenqueuedCmdEvents.push_back(EventRetImpl); + Deps.UnenqueuedCmdEvents.push_back(std::move(EventRetImpl)); else if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) { - Deps.LastBarrier = EventRetImpl; + Deps.LastBarrier = std::move(EventRetImpl); Deps.UnenqueuedCmdEvents.clear(); } else if (!EventRetImpl->isEnqueued()) { - Deps.UnenqueuedCmdEvents.push_back(EventRetImpl); + Deps.UnenqueuedCmdEvents.push_back(std::move(EventRetImpl)); } return EventRet; @@ -1040,6 +1047,8 @@ class queue_impl { // the fallback implementation of profiling info bool MFallbackProfiling = false; + // Is value presented in MInOrderExternalEvent? + std::atomic_bool MHasValueInOrderExternalEvent = false; // This event can be optionally provided by users for in-order queues to add // an additional dependency for the subsequent submission in to the queue. // Access to the event should be guarded with MInOrderExternalEventMtx. @@ -1071,6 +1080,7 @@ class queue_impl { std::deque> MMissedCleanupRequests; std::mutex MMissedCleanupRequestsMtx; + std::atomic_bool MAreCleanupRequestsMissed = false; friend class sycl::ext::oneapi::experimental::detail::node_impl; From 2af3dec7a51ffa1d6eab3a6eb90ee236d9e8336c Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Tue, 1 Apr 2025 12:55:58 +0200 Subject: [PATCH 02/10] Move check-lock-check logic to a separate template. --- sycl/source/detail/queue_impl.cpp | 30 +++++++------- sycl/source/detail/queue_impl.hpp | 69 ++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 39 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 036abe79f15c6..5c1e9f8ca25fe 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -284,13 +284,13 @@ event queue_impl::memcpyFromDeviceGlobal( } sycl::detail::optional queue_impl::getLastEvent() { - { - // The external event is required to finish last if set, so it is considered - // the last event if present. - std::lock_guard Lock(MInOrderExternalEventMtx); - if (MInOrderExternalEvent) - return *MInOrderExternalEvent; - } + // The external event is required to finish last if set, so it is considered + // the last event if present. + if (std::optional ExternalEvent = + MInOrderExternalEvent.read([](std::optional &InOrderExternalEvent) { + return InOrderExternalEvent; + })) + return ExternalEvent; std::lock_guard Lock{MMutex}; if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr) @@ -618,13 +618,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { WeakEvents.swap(MEventsWeak); SharedEvents.swap(MEventsShared); - if (MAreCleanupRequestsMissed.load(std::memory_order_acquire)) { - std::lock_guard RequestLock(MMissedCleanupRequestsMtx); - MAreCleanupRequestsMissed.store(false, std::memory_order_release); - for (auto &UpdatedGraph : MMissedCleanupRequests) + MMissedCleanupRequests.get([this](MissedCleanupRequestsType &MissedCleanupRequests){ + for (auto &UpdatedGraph : MissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); - MMissedCleanupRequests.clear(); - } + MissedCleanupRequests.clear(); + }); } // If the queue is either a host one or does not support OOO (and we use // multiple in-order queues as a result of that), wait for each event @@ -800,9 +798,9 @@ void queue_impl::revisitUnenqueuedCommandsState( if (Lock.owns_lock()) doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph()); else { - std::lock_guard RequestLock(MMissedCleanupRequestsMtx); - MMissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); - MAreCleanupRequestsMissed.store(true, std::memory_order_release); + MMissedCleanupRequests.put([CompletedHostTask](MissedCleanupRequestsType &MissedCleanupRequests){ + MissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); + }); } } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 16b82f86ae3aa..12d49ad10f020 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -710,19 +710,17 @@ class queue_impl { void *getTraceEvent() { return MTraceEvent; } void setExternalEvent(const event &Event) { - std::lock_guard Lock(MInOrderExternalEventMtx); - MHasValueInOrderExternalEvent.store(true, std::memory_order_release); - MInOrderExternalEvent = Event; + MInOrderExternalEvent.put([&Event](std::optional &InOrderExternalEvent){ + InOrderExternalEvent = Event; + }); } std::optional popExternalEvent() { std::optional Result = std::nullopt; - if (MHasValueInOrderExternalEvent.load(std::memory_order_acquire)) { - std::lock_guard Lock(MInOrderExternalEventMtx); - MHasValueInOrderExternalEvent.store(false, std::memory_order_release); - std::swap(Result, MInOrderExternalEvent); - } + MInOrderExternalEvent.get([&Result](std::optional &InOrderExternalEvent) { + std::swap(Result, InOrderExternalEvent); + }); return Result; } @@ -838,14 +836,11 @@ class queue_impl { // dependency so in the case where some commands were not enqueued // (blocked), we track them to prevent barrier from being enqueued // earlier. - if (MAreCleanupRequestsMissed.load(std::memory_order_acquire)) - { - std::lock_guard RequestLock(MMissedCleanupRequestsMtx); - MAreCleanupRequestsMissed.store(false, std::memory_order_release); - for (auto &UpdatedGraph : MMissedCleanupRequests) + MMissedCleanupRequests.get([this](MissedCleanupRequestsType &MissedCleanupRequests){ + for (auto &UpdatedGraph : MissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); - MMissedCleanupRequests.clear(); - } + MissedCleanupRequests.clear(); + }); auto &Deps = MGraph.expired() ? MDefaultGraphDeps : MExtGraphDeps; if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents.empty()) { Handler.depends_on(Deps.UnenqueuedCmdEvents); @@ -1029,6 +1024,38 @@ class queue_impl { } } MDefaultGraphDeps, MExtGraphDeps; + // implement check-lock-check pattern to not lock empty MData + template + class CheckLockCheck { + DataType MData; + std::atomic_bool MDataPresent = false; + mutable std::mutex MDataMtx; + public: + template + void put(F &&func) { + std::lock_guard Lock(MDataMtx); + MDataPresent.store(true, std::memory_order_release); + func(MData); + } + template + void get(F &&func) { + if (MDataPresent.load(std::memory_order_acquire)) { + std::lock_guard Lock(MDataMtx); + if (MDataPresent.load(std::memory_order_acquire)) { + func(MData); + MDataPresent.store(false, std::memory_order_release); + } + } + } + template + DataType read(F &&func) { + if (!MDataPresent.load(std::memory_order_acquire)) + return DataType{}; + std::lock_guard Lock(MDataMtx); + return func(MData); + } + }; + const bool MIsInorder; std::vector MStreamsServiceEvents; @@ -1047,14 +1074,11 @@ class queue_impl { // the fallback implementation of profiling info bool MFallbackProfiling = false; - // Is value presented in MInOrderExternalEvent? - std::atomic_bool MHasValueInOrderExternalEvent = false; // This event can be optionally provided by users for in-order queues to add // an additional dependency for the subsequent submission in to the queue. // Access to the event should be guarded with MInOrderExternalEventMtx. // NOTE: std::optional must not be exposed in the ABI. - std::optional MInOrderExternalEvent; - mutable std::mutex MInOrderExternalEventMtx; + CheckLockCheck> MInOrderExternalEvent; public: // Queue constructed with the discard_events property @@ -1077,10 +1101,9 @@ class queue_impl { unsigned long long MQueueID; static std::atomic MNextAvailableQueueID; - std::deque> - MMissedCleanupRequests; - std::mutex MMissedCleanupRequestsMtx; - std::atomic_bool MAreCleanupRequestsMissed = false; + using MissedCleanupRequestsType = + std::deque>; + CheckLockCheck MMissedCleanupRequests; friend class sycl::ext::oneapi::experimental::detail::node_impl; From 32a8487b521cf9d2c8caf8bf1ea1eed061803456 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Tue, 1 Apr 2025 13:11:48 +0200 Subject: [PATCH 03/10] Code formatting. --- sycl/source/detail/queue_impl.cpp | 26 ++++++++++--------- sycl/source/detail/queue_impl.hpp | 42 +++++++++++++++---------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 5c1e9f8ca25fe..03af380300749 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -286,10 +286,10 @@ event queue_impl::memcpyFromDeviceGlobal( sycl::detail::optional queue_impl::getLastEvent() { // The external event is required to finish last if set, so it is considered // the last event if present. - if (std::optional ExternalEvent = - MInOrderExternalEvent.read([](std::optional &InOrderExternalEvent) { - return InOrderExternalEvent; - })) + if (std::optional ExternalEvent = MInOrderExternalEvent.read( + [](std::optional &InOrderExternalEvent) { + return InOrderExternalEvent; + })) return ExternalEvent; std::lock_guard Lock{MMutex}; @@ -618,11 +618,12 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { WeakEvents.swap(MEventsWeak); SharedEvents.swap(MEventsShared); - MMissedCleanupRequests.get([this](MissedCleanupRequestsType &MissedCleanupRequests){ - for (auto &UpdatedGraph : MissedCleanupRequests) - doUnenqueuedCommandCleanup(UpdatedGraph); - MissedCleanupRequests.clear(); - }); + MMissedCleanupRequests.get( + [this](MissedCleanupRequestsType &MissedCleanupRequests){ + for (auto &UpdatedGraph : MissedCleanupRequests) + doUnenqueuedCommandCleanup(UpdatedGraph); + MissedCleanupRequests.clear(); + }); } // If the queue is either a host one or does not support OOO (and we use // multiple in-order queues as a result of that), wait for each event @@ -798,9 +799,10 @@ void queue_impl::revisitUnenqueuedCommandsState( if (Lock.owns_lock()) doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph()); else { - MMissedCleanupRequests.put([CompletedHostTask](MissedCleanupRequestsType &MissedCleanupRequests){ - MissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); - }); + MMissedCleanupRequests.put( + [CompletedHostTask](MissedCleanupRequestsType &MissedCleanupRequests){ + MissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); + }); } } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 12d49ad10f020..411038177f11b 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -710,17 +710,19 @@ class queue_impl { void *getTraceEvent() { return MTraceEvent; } void setExternalEvent(const event &Event) { - MInOrderExternalEvent.put([&Event](std::optional &InOrderExternalEvent){ - InOrderExternalEvent = Event; - }); + MInOrderExternalEvent.put( + [&Event](std::optional &InOrderExternalEvent){ + InOrderExternalEvent = Event; + }); } std::optional popExternalEvent() { std::optional Result = std::nullopt; - MInOrderExternalEvent.get([&Result](std::optional &InOrderExternalEvent) { - std::swap(Result, InOrderExternalEvent); - }); + MInOrderExternalEvent.get( + [&Result](std::optional &InOrderExternalEvent) { + std::swap(Result, InOrderExternalEvent); + }); return Result; } @@ -836,11 +838,12 @@ class queue_impl { // dependency so in the case where some commands were not enqueued // (blocked), we track them to prevent barrier from being enqueued // earlier. - MMissedCleanupRequests.get([this](MissedCleanupRequestsType &MissedCleanupRequests){ - for (auto &UpdatedGraph : MissedCleanupRequests) - doUnenqueuedCommandCleanup(UpdatedGraph); - MissedCleanupRequests.clear(); - }); + MMissedCleanupRequests.get( + [this](MissedCleanupRequestsType &MissedCleanupRequests){ + for (auto &UpdatedGraph : MissedCleanupRequests) + doUnenqueuedCommandCleanup(UpdatedGraph); + MissedCleanupRequests.clear(); + }); auto &Deps = MGraph.expired() ? MDefaultGraphDeps : MExtGraphDeps; if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents.empty()) { Handler.depends_on(Deps.UnenqueuedCmdEvents); @@ -1025,20 +1028,18 @@ class queue_impl { } MDefaultGraphDeps, MExtGraphDeps; // implement check-lock-check pattern to not lock empty MData - template - class CheckLockCheck { + template class CheckLockCheck { DataType MData; std::atomic_bool MDataPresent = false; mutable std::mutex MDataMtx; + public: - template - void put(F &&func) { + template void put(F &&func) { std::lock_guard Lock(MDataMtx); MDataPresent.store(true, std::memory_order_release); func(MData); } - template - void get(F &&func) { + template void get(F &&func) { if (MDataPresent.load(std::memory_order_acquire)) { std::lock_guard Lock(MDataMtx); if (MDataPresent.load(std::memory_order_acquire)) { @@ -1047,8 +1048,7 @@ class queue_impl { } } } - template - DataType read(F &&func) { + template DataType read(F &&func) { if (!MDataPresent.load(std::memory_order_acquire)) return DataType{}; std::lock_guard Lock(MDataMtx); @@ -1101,8 +1101,8 @@ class queue_impl { unsigned long long MQueueID; static std::atomic MNextAvailableQueueID; - using MissedCleanupRequestsType = - std::deque>; + using MissedCleanupRequestsType = std::deque< + std::shared_ptr>; CheckLockCheck MMissedCleanupRequests; friend class sycl::ext::oneapi::experimental::detail::node_impl; From 4bd575c78d9eeea57c5862f2beeeff95c584ee6d Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Tue, 1 Apr 2025 14:04:57 +0200 Subject: [PATCH 04/10] Code formatting. --- sycl/source/detail/queue_impl.cpp | 4 ++-- sycl/source/detail/queue_impl.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 03af380300749..f54a7598e6316 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -619,7 +619,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { SharedEvents.swap(MEventsShared); MMissedCleanupRequests.get( - [this](MissedCleanupRequestsType &MissedCleanupRequests){ + [this](MissedCleanupRequestsType &MissedCleanupRequests) { for (auto &UpdatedGraph : MissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); MissedCleanupRequests.clear(); @@ -800,7 +800,7 @@ void queue_impl::revisitUnenqueuedCommandsState( doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph()); else { MMissedCleanupRequests.put( - [CompletedHostTask](MissedCleanupRequestsType &MissedCleanupRequests){ + [CompletedHostTask](MissedCleanupRequestsType &MissedCleanupRequests) { MissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); }); } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 411038177f11b..c997b5b4a64c0 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -711,7 +711,7 @@ class queue_impl { void setExternalEvent(const event &Event) { MInOrderExternalEvent.put( - [&Event](std::optional &InOrderExternalEvent){ + [&Event](std::optional &InOrderExternalEvent) { InOrderExternalEvent = Event; }); } @@ -839,7 +839,7 @@ class queue_impl { // (blocked), we track them to prevent barrier from being enqueued // earlier. MMissedCleanupRequests.get( - [this](MissedCleanupRequestsType &MissedCleanupRequests){ + [this](MissedCleanupRequestsType &MissedCleanupRequests) { for (auto &UpdatedGraph : MissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); MissedCleanupRequests.clear(); From 2b1b35f14c14d18239a598b1a0495e581c6bad8f Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 2 Apr 2025 18:43:06 +0200 Subject: [PATCH 05/10] Rename put/get to push/pop. Change args passing in lambda functions. --- sycl/source/detail/queue_impl.cpp | 8 ++++---- sycl/source/detail/queue_impl.hpp | 25 ++++++++++++------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index f54a7598e6316..1e13ad476710b 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -618,8 +618,8 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { WeakEvents.swap(MEventsWeak); SharedEvents.swap(MEventsShared); - MMissedCleanupRequests.get( - [this](MissedCleanupRequestsType &MissedCleanupRequests) { + MMissedCleanupRequests.pop( + [&](MissedCleanupRequestsType &MissedCleanupRequests) { for (auto &UpdatedGraph : MissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); MissedCleanupRequests.clear(); @@ -799,8 +799,8 @@ void queue_impl::revisitUnenqueuedCommandsState( if (Lock.owns_lock()) doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph()); else { - MMissedCleanupRequests.put( - [CompletedHostTask](MissedCleanupRequestsType &MissedCleanupRequests) { + MMissedCleanupRequests.push( + [&](MissedCleanupRequestsType &MissedCleanupRequests) { MissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); }); } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c997b5b4a64c0..db9adabd91c1a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -710,8 +710,8 @@ class queue_impl { void *getTraceEvent() { return MTraceEvent; } void setExternalEvent(const event &Event) { - MInOrderExternalEvent.put( - [&Event](std::optional &InOrderExternalEvent) { + MInOrderExternalEvent.push( + [&](std::optional &InOrderExternalEvent) { InOrderExternalEvent = Event; }); } @@ -719,8 +719,8 @@ class queue_impl { std::optional popExternalEvent() { std::optional Result = std::nullopt; - MInOrderExternalEvent.get( - [&Result](std::optional &InOrderExternalEvent) { + MInOrderExternalEvent.pop( + [&](std::optional &InOrderExternalEvent) { std::swap(Result, InOrderExternalEvent); }); return Result; @@ -838,8 +838,8 @@ class queue_impl { // dependency so in the case where some commands were not enqueued // (blocked), we track them to prevent barrier from being enqueued // earlier. - MMissedCleanupRequests.get( - [this](MissedCleanupRequestsType &MissedCleanupRequests) { + MMissedCleanupRequests.pop( + [&](MissedCleanupRequestsType &MissedCleanupRequests) { for (auto &UpdatedGraph : MissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); MissedCleanupRequests.clear(); @@ -853,14 +853,13 @@ class queue_impl { Handler.depends_on(Deps.LastBarrier); auto EventRet = Handler.finalize(); - EventImplPtr EventRetImpl = getSyclObjImpl(EventRet); if (Type == CGType::CodeplayHostTask) - Deps.UnenqueuedCmdEvents.push_back(std::move(EventRetImpl)); + Deps.UnenqueuedCmdEvents.push_back(getSyclObjImpl(EventRet)); else if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) { - Deps.LastBarrier = std::move(EventRetImpl); + Deps.LastBarrier = getSyclObjImpl(EventRet); Deps.UnenqueuedCmdEvents.clear(); - } else if (!EventRetImpl->isEnqueued()) { - Deps.UnenqueuedCmdEvents.push_back(std::move(EventRetImpl)); + } else if (!getSyclObjImpl(EventRet)->isEnqueued()) { + Deps.UnenqueuedCmdEvents.push_back(getSyclObjImpl(EventRet)); } return EventRet; @@ -1034,12 +1033,12 @@ class queue_impl { mutable std::mutex MDataMtx; public: - template void put(F &&func) { + template void push(F &&func) { std::lock_guard Lock(MDataMtx); MDataPresent.store(true, std::memory_order_release); func(MData); } - template void get(F &&func) { + template void pop(F &&func) { if (MDataPresent.load(std::memory_order_acquire)) { std::lock_guard Lock(MDataMtx); if (MDataPresent.load(std::memory_order_acquire)) { From ed1c0c42126bb504dad39ab00d7f6f4b1e7e6625 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 2 Apr 2025 19:12:30 +0200 Subject: [PATCH 06/10] Code formatting. --- sycl/source/detail/queue_impl.hpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index fb8ad38fd9a0a..ef3928832a36e 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -710,19 +710,17 @@ class queue_impl { void *getTraceEvent() { return MTraceEvent; } void setExternalEvent(const event &Event) { - MInOrderExternalEvent.push( - [&](std::optional &InOrderExternalEvent) { - InOrderExternalEvent = Event; - }); + MInOrderExternalEvent.push([&](std::optional &InOrderExternalEvent) { + InOrderExternalEvent = Event; + }); } std::optional popExternalEvent() { std::optional Result = std::nullopt; - MInOrderExternalEvent.pop( - [&](std::optional &InOrderExternalEvent) { - std::swap(Result, InOrderExternalEvent); - }); + MInOrderExternalEvent.pop([&](std::optional &InOrderExternalEvent) { + std::swap(Result, InOrderExternalEvent); + }); return Result; } From 647a8fb7f018ce386e33762cda7fa0def6f76758 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 3 Apr 2025 07:32:57 -0700 Subject: [PATCH 07/10] Code review (#1) * Rename to `set`/`unset` * Simplify `read` * Be pedantic * Update comment --- sycl/source/detail/queue_impl.cpp | 9 +++------ sycl/source/detail/queue_impl.hpp | 33 ++++++++++++++++--------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 07239041c0c6c..75f524f409cfc 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -286,10 +286,7 @@ event queue_impl::memcpyFromDeviceGlobal( sycl::detail::optional queue_impl::getLastEvent() { // The external event is required to finish last if set, so it is considered // the last event if present. - if (std::optional ExternalEvent = MInOrderExternalEvent.read( - [](std::optional &InOrderExternalEvent) { - return InOrderExternalEvent; - })) + if (std::optional ExternalEvent = MInOrderExternalEvent.read()) return ExternalEvent; std::lock_guard Lock{MMutex}; @@ -618,7 +615,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { WeakEvents.swap(MEventsWeak); SharedEvents.swap(MEventsShared); - MMissedCleanupRequests.pop( + MMissedCleanupRequests.unset( [&](MissedCleanupRequestsType &MissedCleanupRequests) { for (auto &UpdatedGraph : MissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); @@ -800,7 +797,7 @@ void queue_impl::revisitUnenqueuedCommandsState( if (Lock.owns_lock()) doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph()); else { - MMissedCleanupRequests.push( + MMissedCleanupRequests.set( [&](MissedCleanupRequestsType &MissedCleanupRequests) { MissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); }); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ef3928832a36e..e02aca36478b1 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -710,7 +710,7 @@ class queue_impl { void *getTraceEvent() { return MTraceEvent; } void setExternalEvent(const event &Event) { - MInOrderExternalEvent.push([&](std::optional &InOrderExternalEvent) { + MInOrderExternalEvent.set([&](std::optional &InOrderExternalEvent) { InOrderExternalEvent = Event; }); } @@ -718,7 +718,7 @@ class queue_impl { std::optional popExternalEvent() { std::optional Result = std::nullopt; - MInOrderExternalEvent.pop([&](std::optional &InOrderExternalEvent) { + MInOrderExternalEvent.unset([&](std::optional &InOrderExternalEvent) { std::swap(Result, InOrderExternalEvent); }); return Result; @@ -841,7 +841,7 @@ class queue_impl { // dependency so in the case where some commands were not enqueued // (blocked), we track them to prevent barrier from being enqueued // earlier. - MMissedCleanupRequests.pop( + MMissedCleanupRequests.unset( [&](MissedCleanupRequestsType &MissedCleanupRequests) { for (auto &UpdatedGraph : MissedCleanupRequests) doUnenqueuedCommandCleanup(UpdatedGraph); @@ -1030,32 +1030,33 @@ class queue_impl { } } MDefaultGraphDeps, MExtGraphDeps; - // implement check-lock-check pattern to not lock empty MData + // Implement check-lock-check pattern to not lock empty MData as the locks + // come with runtime overhead. template class CheckLockCheck { DataType MData; - std::atomic_bool MDataPresent = false; + std::atomic_bool MIsSet = false; mutable std::mutex MDataMtx; public: - template void push(F &&func) { + template void set(F &&func) { std::lock_guard Lock(MDataMtx); - MDataPresent.store(true, std::memory_order_release); - func(MData); + MIsSet.store(true, std::memory_order_release); + std::forward(func)(MData); } - template void pop(F &&func) { - if (MDataPresent.load(std::memory_order_acquire)) { + template void unset(F &&func) { + if (MIsSet.load(std::memory_order_acquire)) { std::lock_guard Lock(MDataMtx); - if (MDataPresent.load(std::memory_order_acquire)) { - func(MData); - MDataPresent.store(false, std::memory_order_release); + if (MIsSet.load(std::memory_order_acquire)) { + std::forward(func)(MData); + MIsSet.store(false, std::memory_order_release); } } } - template DataType read(F &&func) { - if (!MDataPresent.load(std::memory_order_acquire)) + DataType read() { + if (!MIsSet.load(std::memory_order_acquire)) return DataType{}; std::lock_guard Lock(MDataMtx); - return func(MData); + return MData; } }; From 5b3a159c0bef31d35d8e1b3be6d7ae5d6db20062 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Thu, 3 Apr 2025 16:50:08 +0200 Subject: [PATCH 08/10] Code formatting. --- sycl/source/detail/queue_impl.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index e02aca36478b1..24e767ef10d91 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -718,9 +718,10 @@ class queue_impl { std::optional popExternalEvent() { std::optional Result = std::nullopt; - MInOrderExternalEvent.unset([&](std::optional &InOrderExternalEvent) { - std::swap(Result, InOrderExternalEvent); - }); + MInOrderExternalEvent.unset( + [&](std::optional &InOrderExternalEvent) { + std::swap(Result, InOrderExternalEvent); + }); return Result; } From ec7cce0c21af8768ccce0730eb618bbe756f482c Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Mon, 7 Apr 2025 14:12:25 +0200 Subject: [PATCH 09/10] Revert changes related to MMissedCleanupRequests. --- sycl/source/detail/queue_impl.cpp | 18 ++++++++---------- sycl/source/detail/queue_impl.hpp | 18 +++++++++--------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 75f524f409cfc..b6ba59bdf7ed7 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -615,12 +615,12 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { WeakEvents.swap(MEventsWeak); SharedEvents.swap(MEventsShared); - MMissedCleanupRequests.unset( - [&](MissedCleanupRequestsType &MissedCleanupRequests) { - for (auto &UpdatedGraph : MissedCleanupRequests) - doUnenqueuedCommandCleanup(UpdatedGraph); - MissedCleanupRequests.clear(); - }); + { + std::lock_guard RequestLock(MMissedCleanupRequestsMtx); + for (auto &UpdatedGraph : MMissedCleanupRequests) + doUnenqueuedCommandCleanup(UpdatedGraph); + MMissedCleanupRequests.clear(); + } } // If the queue is either a host one or does not support OOO (and we use // multiple in-order queues as a result of that), wait for each event @@ -797,10 +797,8 @@ void queue_impl::revisitUnenqueuedCommandsState( if (Lock.owns_lock()) doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph()); else { - MMissedCleanupRequests.set( - [&](MissedCleanupRequestsType &MissedCleanupRequests) { - MissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); - }); + std::lock_guard RequestLock(MMissedCleanupRequestsMtx); + MMissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph()); } } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index f749ef6dfde89..844c8bdadcb9e 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -842,12 +842,12 @@ class queue_impl { // dependency so in the case where some commands were not enqueued // (blocked), we track them to prevent barrier from being enqueued // earlier. - MMissedCleanupRequests.unset( - [&](MissedCleanupRequestsType &MissedCleanupRequests) { - for (auto &UpdatedGraph : MissedCleanupRequests) - doUnenqueuedCommandCleanup(UpdatedGraph); - MissedCleanupRequests.clear(); - }); + { + std::lock_guard RequestLock(MMissedCleanupRequestsMtx); + for (auto &UpdatedGraph : MMissedCleanupRequests) + doUnenqueuedCommandCleanup(UpdatedGraph); + MMissedCleanupRequests.clear(); + } auto &Deps = MGraph.expired() ? MDefaultGraphDeps : MExtGraphDeps; if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents.empty()) { Handler.depends_on(Deps.UnenqueuedCmdEvents); @@ -1104,9 +1104,9 @@ class queue_impl { unsigned long long MQueueID; static std::atomic MNextAvailableQueueID; - using MissedCleanupRequestsType = std::deque< - std::shared_ptr>; - CheckLockCheck MMissedCleanupRequests; + std::deque> + MMissedCleanupRequests; + std::mutex MMissedCleanupRequestsMtx; friend class sycl::ext::oneapi::experimental::detail::node_impl; From 3b247207a4647c5cef8f9125bea63ac820a9620e Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Mon, 7 Apr 2025 14:29:40 +0200 Subject: [PATCH 10/10] Fix comment. --- sycl/source/detail/queue_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 844c8bdadcb9e..b4725ae24a111 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -1079,7 +1079,7 @@ class queue_impl { // This event can be optionally provided by users for in-order queues to add // an additional dependency for the subsequent submission in to the queue. - // Access to the event should be guarded with MInOrderExternalEventMtx. + // Access to the event should be guarded with mutex. // NOTE: std::optional must not be exposed in the ABI. CheckLockCheck> MInOrderExternalEvent;