diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 7ecf5ce4c8b14..6e49b6e549c6e 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -89,10 +89,18 @@ template struct LaunchConfigAccess { } }; -template -void submit_impl(queue &Q, CommandGroupFunc &&CGF, +template +void submit_impl(queue &Q, PropertiesT Props, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc) { - Q.submit_without_event(std::forward(CGF), CodeLoc); + Q.submit_without_event(Props, std::forward(CGF), CodeLoc); +} + +template +event submit_with_event_impl(queue &Q, PropertiesT Props, + CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc) { + return Q.submit_with_event(Props, std::forward(CGF), + nullptr, CodeLoc); } } // namespace detail @@ -100,9 +108,8 @@ template void submit(queue Q, PropertiesT Props, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc = sycl::detail::code_location::current()) { - std::ignore = Props; sycl::ext::oneapi::experimental::detail::submit_impl( - Q, std::forward(CGF), CodeLoc); + Q, Props, std::forward(CGF), CodeLoc); } template @@ -116,8 +123,8 @@ template event submit_with_event(queue Q, PropertiesT Props, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc = sycl::detail::code_location::current()) { - std::ignore = Props; - return Q.submit(std::forward(CGF), CodeLoc); + return sycl::ext::oneapi::experimental::detail::submit_with_event_impl( + Q, Props, std::forward(CGF), CodeLoc); } template diff --git a/sycl/include/sycl/ext/oneapi/experimental/event_mode_property.hpp b/sycl/include/sycl/ext/oneapi/experimental/event_mode_property.hpp new file mode 100644 index 0000000000000..dc9967fe24103 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/event_mode_property.hpp @@ -0,0 +1,37 @@ +//==-- cluster_group_prop.hpp --- SYCL extension for event mode property ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +enum class event_mode_enum { none, low_power }; + +struct event_mode + : detail::run_time_property_key { + event_mode(event_mode_enum mode) : value(mode) {} + + event_mode_enum value; +}; + +using event_mode_key = event_mode; + +inline bool operator==(const event_mode &lhs, const event_mode &rhs) { + return lhs.value == rhs.value; +} +inline bool operator!=(const event_mode &lhs, const event_mode &rhs) { + return !(lhs == rhs); +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index c5cec5d5639a3..715a3e8b83252 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -223,8 +223,9 @@ enum PropKind : uint32_t { InitializeToIdentity = 78, WorkGroupScratchSize = 79, Restrict = 80, + EventMode = 81, // PropKindSize must always be the last value. - PropKindSize = 81, + PropKindSize = 82, }; template struct PropertyToKind { diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index d34439133ca76..2d0967630f9e3 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -30,15 +30,16 @@ #include // for defaultAsyncHa... #include // for device_global #include // for device_image_s... -#include // for command_graph... -#include // for empty_properti... -#include // for handler, isDev... -#include // for id -#include // for auto_name -#include // for kernel_handler -#include // for nd_range -#include // for property_list -#include // for range +#include +#include // for command_graph... +#include // for empty_properti... +#include // for handler, isDev... +#include // for id +#include // for auto_name +#include // for kernel_handler +#include // for nd_range +#include // for property_list +#include // for range #include // for size_t #include // for function @@ -99,6 +100,9 @@ class __SYCL_EXPORT SubmissionInfo { std::shared_ptr &SecondaryQueue(); const std::shared_ptr &SecondaryQueue() const; + ext::oneapi::experimental::event_mode_enum &EventMode(); + const ext::oneapi::experimental::event_mode_enum &EventMode() const; + private: std::shared_ptr impl = nullptr; }; @@ -111,9 +115,14 @@ enum class queue_state { executing, recording }; struct image_descriptor; namespace detail { -template -void submit_impl(queue &Q, CommandGroupFunc &&CGF, +template +void submit_impl(queue &Q, PropertiesT Props, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc); + +template +event submit_with_event_impl(queue &Q, PropertiesT Props, + CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc); } // namespace detail } // namespace ext::oneapi::experimental @@ -366,7 +375,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { std::enable_if_t, event> submit( T CGF, const detail::code_location &CodeLoc = detail::code_location::current()) { - return submit_with_event(CGF, /*SecondaryQueuePtr=*/nullptr, CodeLoc); + return submit_with_event( + sycl::ext::oneapi::experimental::empty_properties_t{}, CGF, + /*SecondaryQueuePtr=*/nullptr, CodeLoc); } /// Submits a command group function object to the queue, in order to be @@ -384,7 +395,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { std::enable_if_t, event> submit( T CGF, queue &SecondaryQueue, const detail::code_location &CodeLoc = detail::code_location::current()) { - return submit_with_event(CGF, &SecondaryQueue, CodeLoc); + return submit_with_event( + sycl::ext::oneapi::experimental::empty_properties_t{}, CGF, + &SecondaryQueue, CodeLoc); } /// Prevents any commands submitted afterward to this queue from executing @@ -2747,11 +2760,28 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const detail::code_location &); #endif - template + template friend void ext::oneapi::experimental::detail::submit_impl( - queue &Q, CommandGroupFunc &&CGF, + queue &Q, PropertiesT Props, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc); + template + friend event ext::oneapi::experimental::detail::submit_with_event_impl( + queue &Q, PropertiesT Props, CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc); + + template + void ProcessSubmitProperties(PropertiesT Props, detail::SubmissionInfo &SI) { + if constexpr (Props.template has_property< + ext::oneapi::experimental::event_mode_key>()) { + ext::oneapi::experimental::event_mode EventModeProp = + Props.template get_property(); + if (EventModeProp.value != + ext::oneapi::experimental::event_mode_enum::none) + SI.EventMode() = EventModeProp.value; + } + } + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES /// TODO: Unused. Remove these when ABI-break window is open. event submit_impl(std::function CGH, @@ -2800,16 +2830,18 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// + /// \param Props is a property list with submission properties. /// \param CGF is a function object containing command group. /// \param CodeLoc is the code location of the submit call (default argument) /// \return a SYCL event object for the submitted command group. - template + template std::enable_if_t, event> submit_with_event( - T CGF, queue *SecondaryQueuePtr, + PropertiesT Props, T CGF, queue *SecondaryQueuePtr, const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); detail::SubmissionInfo SI{}; + ProcessSubmitProperties(Props, SI); if (SecondaryQueuePtr) SI.SecondaryQueue() = detail::getSyclObjImpl(*SecondaryQueuePtr); #if __SYCL_USE_FALLBACK_ASSERT @@ -2834,18 +2866,21 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// + /// \param Props is a property list with submission properties. /// \param CGF is a function object containing command group. /// \param CodeLoc is the code location of the submit call (default argument) - template + template std::enable_if_t, void> - submit_without_event(T CGF, const detail::code_location &CodeLoc) { + submit_without_event(PropertiesT Props, T CGF, + const detail::code_location &CodeLoc) { #if __SYCL_USE_FALLBACK_ASSERT // If post-processing is needed, fall back to the regular submit. // TODO: Revisit whether we can avoid this. - submit_with_event(CGF, nullptr, CodeLoc); + submit_with_event(Props, CGF, nullptr, CodeLoc); #else detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); detail::SubmissionInfo SI{}; + ProcessSubmitProperties(Props, SI); submit_without_event_impl(CGF, SI, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); #endif // __SYCL_USE_FALLBACK_ASSERT @@ -3072,8 +3107,12 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, }); }; - CopierEv = Self.submit_with_event(CopierCGF, SecondaryQueue, CodeLoc); - CheckerEv = Self.submit_with_event(CheckerCGF, SecondaryQueue, CodeLoc); + CopierEv = Self.submit_with_event( + sycl::ext::oneapi::experimental::empty_properties_t{}, CopierCGF, + SecondaryQueue, CodeLoc); + CheckerEv = Self.submit_with_event( + sycl::ext::oneapi::experimental::empty_properties_t{}, CheckerCGF, + SecondaryQueue, CodeLoc); return CheckerEv; } diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 5f08485159187..8d3a5e56ac71a 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -15,8 +15,9 @@ #include // for ur_rect_region_t, ur_rect_offset_t #include // for event_impl #include // for queue_impl -#include // for kernel_impl -#include // for kernel_bundle_impl +#include +#include // for kernel_impl +#include // for kernel_bundle_impl #include // for assert #include // for shared_ptr, unique_ptr @@ -425,12 +426,16 @@ class CGAdviseUSM : public CG { class CGBarrier : public CG { public: std::vector MEventsWaitWithBarrier; + ext::oneapi::experimental::event_mode_enum MEventMode = + ext::oneapi::experimental::event_mode_enum::none; CGBarrier(std::vector EventsWaitWithBarrier, + ext::oneapi::experimental::event_mode_enum EventMode, CG::StorageInitHelper CGData, CGType Type, detail::code_location loc = {}) : CG(Type, std::move(CGData), std::move(loc)), - MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)) {} + MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)), + MEventMode(EventMode) {} }; class CGProfilingTag : public CG { diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index cf776bcbc2cc3..92d8f2aed975c 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -196,7 +196,12 @@ class handler_impl { bool MIsTopCodeLoc = true; /// List of work group memory objects associated with this handler - std::vector> MWorkGroupMemoryObjects; + std::vector> + MWorkGroupMemoryObjects; + + /// Potential event mode for the result event of the command. + ext::oneapi::experimental::event_mode_enum MEventMode = + ext::oneapi::experimental::event_mode_enum::none; }; } // namespace detail diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index a6f1559cd8b2f..88374d3289c20 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -358,6 +358,7 @@ event queue_impl::submit_impl(const std::function &CGF, bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo) { handler Handler(Self, PrimaryQueue, SecondaryQueue, CallerNeedsEvent); + auto HandlerImpl = detail::getSyclObjImpl(Handler); Handler.saveCodeLoc(Loc, IsTopCodeLoc); { @@ -368,13 +369,15 @@ event queue_impl::submit_impl(const std::function &CGF, // Scheduler will later omit events, that are not required to execute tasks. // Host and interop tasks, however, are not submitted to low-level runtimes // and require separate dependency management. - const CGType Type = detail::getSyclObjImpl(Handler)->MCGType; + const CGType Type = HandlerImpl->MCGType; event Event = detail::createSyclObjFromImpl( std::make_shared()); std::vector Streams; if (Type == CGType::Kernel) Streams = std::move(Handler.MStreamStorage); + HandlerImpl->MEventMode = SubmitInfo.EventMode(); + if (SubmitInfo.PostProcessorFunc()) { auto &PostProcess = *SubmitInfo.PostProcessorFunc(); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ee6b795211e6b..0f99f49d1257d 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -71,6 +71,8 @@ enum QueueOrder { Ordered, OOO }; struct SubmissionInfoImpl { optional MPostProcessorFunc = std::nullopt; std::shared_ptr MSecondaryQueue = nullptr; + ext::oneapi::experimental::event_mode_enum MEventMode = + ext::oneapi::experimental::event_mode_enum::none; }; class queue_impl { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index bf55db9f33909..e71e4f3ded25f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -285,8 +285,9 @@ Command::getUrEvents(const std::vector &EventImpls) const { // solution for the issue that barrier with wait list could not // handle empty ur event handles when kernel is enqueued on host task // completion. -std::vector Command::getUrEventsBlocking( - const std::vector &EventImpls) const { +std::vector +Command::getUrEventsBlocking(const std::vector &EventImpls, + bool HasEventMode) const { std::vector RetUrEvents; for (auto &EventImpl : EventImpls) { // Throwaway events created with empty constructor will not have a context @@ -313,7 +314,11 @@ std::vector Command::getUrEventsBlocking( // At this stage dependency is definitely ur task and need to check if // current one is a host task. In this case we should not skip pi event due // to different sync mechanisms for different task types on in-order queue. - if (MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue && + // If the resulting event is supposed to have a specific event mode, + // redundant events may still differ from the resulting event, so they are + // kept. + if (!HasEventMode && MWorkerQueue && + EventImpl->getWorkerQueue() == MWorkerQueue && MWorkerQueue->isInOrder() && !isHostTask()) continue; @@ -3431,11 +3436,22 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { } case CGType::Barrier: { assert(MQueue && "Barrier submission should have an associated queue"); + CGBarrier *Barrier = static_cast(MCommandGroup.get()); + + // Create properties for the barrier. + ur_exp_enqueue_ext_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES; + Properties.pNext = nullptr; + Properties.flags = 0; + if (Barrier->MEventMode == + ext::oneapi::experimental::event_mode_enum::low_power) + Properties.flags |= UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS; + const AdapterPtr &Adapter = MQueue->getAdapter(); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); - Adapter->call( - MQueue->getHandleRef(), 0, nullptr, Event); + Adapter->call( + MQueue->getHandleRef(), &Properties, 0, nullptr, Event); SetEventHandleOrDiscard(); return UR_RESULT_SUCCESS; } @@ -3443,16 +3459,30 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { assert(MQueue && "Barrier submission should have an associated queue"); CGBarrier *Barrier = static_cast(MCommandGroup.get()); std::vector Events = Barrier->MEventsWaitWithBarrier; - std::vector UrEvents = getUrEventsBlocking(Events); + bool HasEventMode = + Barrier->MEventMode != ext::oneapi::experimental::event_mode_enum::none; + std::vector UrEvents = + getUrEventsBlocking(Events, HasEventMode); if (UrEvents.empty()) { // If Events is empty, then the barrier has no effect. return UR_RESULT_SUCCESS; } + + // Create properties for the barrier. + ur_exp_enqueue_ext_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES; + Properties.pNext = nullptr; + Properties.flags = 0; + if (Barrier->MEventMode == + ext::oneapi::experimental::event_mode_enum::low_power) + Properties.flags |= UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS; + const AdapterPtr &Adapter = MQueue->getAdapter(); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); - Adapter->call( - MQueue->getHandleRef(), UrEvents.size(), &UrEvents[0], Event); + Adapter->call( + MQueue->getHandleRef(), &Properties, UrEvents.size(), &UrEvents[0], + Event); SetEventHandleOrDiscard(); return UR_RESULT_SUCCESS; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index a49f52fbbc436..239cebf521767 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -248,7 +248,8 @@ class Command { /// in order queue. Does blocking enqueue if event is expected to produce ur /// event but has empty native handle. std::vector - getUrEventsBlocking(const std::vector &EventImpls) const; + getUrEventsBlocking(const std::vector &EventImpls, + bool HasEventMode) const; bool isHostTask() const; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 7cdfa2ab48fdb..a8927abf564a2 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -424,9 +424,9 @@ event handler::finalize() { CommandGroup.reset(new detail::CG(detail::CGType::Barrier, std::move(impl->CGData), MCodeLoc)); } else { - CommandGroup.reset( - new detail::CGBarrier(std::move(impl->MEventsWaitWithBarrier), - std::move(impl->CGData), getType(), MCodeLoc)); + CommandGroup.reset(new detail::CGBarrier( + std::move(impl->MEventsWaitWithBarrier), impl->MEventMode, + std::move(impl->CGData), getType(), MCodeLoc)); } break; } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index ac7273081410a..399d67af8afc8 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -40,6 +40,15 @@ const std::shared_ptr & SubmissionInfo::SecondaryQueue() const { return impl->MSecondaryQueue; } + +ext::oneapi::experimental::event_mode_enum &SubmissionInfo::EventMode() { + return impl->MEventMode; +} + +const ext::oneapi::experimental::event_mode_enum & +SubmissionInfo::EventMode() const { + return impl->MEventMode; +} } // namespace detail queue::queue(const context &SyclContext, const device_selector &DeviceSelector, diff --git a/sycl/test-e2e/Adapters/level_zero_barrier_optimization.cpp b/sycl/test-e2e/Adapters/level_zero_barrier_optimization.cpp index 6aabe38701767..7457c14148153 100644 --- a/sycl/test-e2e/Adapters/level_zero_barrier_optimization.cpp +++ b/sycl/test-e2e/Adapters/level_zero_barrier_optimization.cpp @@ -34,7 +34,7 @@ int main() { auto EventB = submitKernel(Q2); // CHECK: Test1 - // CHECK: ---> urEnqueueEventsWaitWithBarrier + // CHECK: ---> urEnqueueEventsWaitWithBarrierExt // CHECK: ZE ---> zeEventCreate // CHECK-OPT: ZE ---> zeCommandListAppendWaitOnEvents // CHECK: ZE ---> zeCommandListAppendSignalEvent @@ -54,7 +54,7 @@ int main() { auto EventB = submitKernel(Q2); // CHECK: Test2 - // CHECK: ---> urEnqueueEventsWaitWithBarrier + // CHECK: ---> urEnqueueEventsWaitWithBarrierExt // CHECK-OPT: ZE ---> {{zeEventCreate|zeEventHostReset}} // CHECK-OPT: ZE ---> zeCommandListAppendWaitOnEvents // CHECK: ZE ---> zeCommandListAppendSignalEvent @@ -74,7 +74,7 @@ int main() { Q2.wait(); Q3.wait(); // CHECK: Test3 - // CHECK: ---> urEnqueueEventsWaitWithBarrier + // CHECK: ---> urEnqueueEventsWaitWithBarrierExt // CHECK: ZE ---> zeEventCreate // CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents // CHECK-NOT: ZE ---> zeCommandListAppendSignalEvent diff --git a/sycl/test-e2e/Adapters/level_zero_batch_barrier.cpp b/sycl/test-e2e/Adapters/level_zero_batch_barrier.cpp index 195378420d474..53549955a368a 100644 --- a/sycl/test-e2e/Adapters/level_zero_batch_barrier.cpp +++ b/sycl/test-e2e/Adapters/level_zero_batch_barrier.cpp @@ -27,7 +27,7 @@ int main(int argc, char *argv[]) { // continue the batch event barrier = q.ext_oneapi_submit_barrier(); - // CHECK: ---> urEnqueueEventsWaitWithBarrier + // CHECK: ---> urEnqueueEventsWaitWithBarrierExt // CHECK-NOT: ZE ---> zeCommandQueueExecuteCommandLists submit_kernel(q); diff --git a/sycl/test-e2e/EnqueueFunctions/barrier.cpp b/sycl/test-e2e/EnqueueFunctions/barrier.cpp index b58486b8530fe..bdc510bb8e447 100644 --- a/sycl/test-e2e/EnqueueFunctions/barrier.cpp +++ b/sycl/test-e2e/EnqueueFunctions/barrier.cpp @@ -50,5 +50,5 @@ int main() { return 0; } -// CHECK-COUNT-4: <--- urEnqueueEventsWaitWithBarrier -// CHECK-NOT: <--- urEnqueueEventsWaitWithBarrier +// CHECK-COUNT-4: <--- urEnqueueEventsWaitWithBarrierExt +// CHECK-NOT: <--- urEnqueueEventsWaitWithBarrierExt diff --git a/sycl/test-e2e/EventMode/low_power_event_mode.cpp b/sycl/test-e2e/EventMode/low_power_event_mode.cpp new file mode 100644 index 0000000000000..1232580a8bd83 --- /dev/null +++ b/sycl/test-e2e/EventMode/low_power_event_mode.cpp @@ -0,0 +1,28 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// Tests that the low-power event mode compiles and executes. Note that the +// event mode is a hint and has no observable behavior, aside from potential +// performance. + +#include +#include +#include + +namespace oneapiext = sycl::ext::oneapi::experimental; + +int main() { + sycl::queue Q; + + oneapiext::properties Props{ + oneapiext::event_mode{oneapiext::event_mode_enum::low_power}}; + + sycl::event E = oneapiext::submit_with_event( + Q, Props, [&](sycl::handler &CGH) { oneapiext::barrier(CGH); }); + + oneapiext::submit_with_event(Q, Props, [&](sycl::handler &CGH) { + oneapiext::partial_barrier(CGH, {E}); + }).wait_and_throw(); + + return 0; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 8d27788b92758..76512bcd3a70c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3247,6 +3247,7 @@ _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEE _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEERKNS0_7contextE _ZN4sycl3_V16detail14SubmissionInfo14SecondaryQueueEv _ZN4sycl3_V16detail14SubmissionInfo17PostProcessorFuncEv +_ZN4sycl3_V16detail14SubmissionInfo9EventModeEv _ZN4sycl3_V16detail14SubmissionInfoC1Ev _ZN4sycl3_V16detail14SubmissionInfoC2Ev _ZN4sycl3_V16detail14addCounterInitERNS0_7handlerERSt10shared_ptrINS1_10queue_implEERS4_IiE @@ -3723,6 +3724,7 @@ _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv _ZNK4sycl3_V16detail14SubmissionInfo14SecondaryQueueEv _ZNK4sycl3_V16detail14SubmissionInfo17PostProcessorFuncEv +_ZNK4sycl3_V16detail14SubmissionInfo9EventModeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getElemSizeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail16AccessorBaseHost13isPlaceholderEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index e8d3186745074..1e52355d67ee7 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -648,6 +648,8 @@ ?Clear@exception_list@_V1@sycl@@AEAAXXZ ?DirSep@OSUtil@detail@_V1@sycl@@2QEBDEB ?DisableRangeRounding@handler@_V1@sycl@@AEAA_NXZ +?EventMode@SubmissionInfo@detail@_V1@sycl@@QEAAAEAW4event_mode_enum@experimental@oneapi@ext@34@XZ +?EventMode@SubmissionInfo@detail@_V1@sycl@@QEBAAEBW4event_mode_enum@experimental@oneapi@ext@34@XZ ?GDBMethodsAnchor@SampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ ?GDBMethodsAnchor@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ ?GetRangeRoundingSettings@handler@_V1@sycl@@AEAAXAEA_K00@Z diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index c6a7bdd891344..12c8ea721fb15 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -111,6 +111,7 @@ // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/properties/property_utils.hpp +// CHECK-NEXT: ext/oneapi/experimental/event_mode_property.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: handler.hpp // CHECK-NEXT: detail/cl.h diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index e2fca2af47b73..5fa5d04d39317 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -18,6 +18,7 @@ add_sycl_unittest(ExtensionsTests OBJECT WorkGroupMemoryBackendArgument.cpp GetLastEvent.cpp BFloat16.cpp + EventMode.cpp ) add_subdirectory(CommandGraph) diff --git a/sycl/unittests/Extensions/EventMode.cpp b/sycl/unittests/Extensions/EventMode.cpp new file mode 100644 index 0000000000000..f11d1a527c832 --- /dev/null +++ b/sycl/unittests/Extensions/EventMode.cpp @@ -0,0 +1,129 @@ +//==------ ProfilingTag.cpp --- sycl_ext_oneapi_event_mode unit tests ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include + +#include + +inline thread_local size_t counter_urEnqueueEventsWaitWithBarrierExt = 0; +inline ur_result_t after_urEnqueueEventsWaitWithBarrierExt(void *pParams) { + auto Params = + *static_cast(pParams); + + assert(*Params.ppProperties != nullptr); + assert((*Params.ppProperties)->flags & + UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS); + + ++counter_urEnqueueEventsWaitWithBarrierExt; + return UR_RESULT_SUCCESS; +} + +class EventModeTest : public ::testing::Test { +public: + EventModeTest() : Mock{} {} + +protected: + void SetUp() override { counter_urEnqueueEventsWaitWithBarrierExt = 0; } + +protected: + sycl::unittest::UrMock<> Mock; +}; + +TEST_F(EventModeTest, EventModeFullBarrier) { + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrierExt", + &after_urEnqueueEventsWaitWithBarrierExt); + + sycl::queue Q; + + sycl::ext::oneapi::experimental::properties Props{ + sycl::ext::oneapi::experimental::event_mode{ + sycl::ext::oneapi::experimental::event_mode_enum::low_power}}; + + sycl::ext::oneapi::experimental::submit_with_event( + Q, Props, + [&](sycl::handler &CGH) { + sycl::ext::oneapi::experimental::barrier(CGH); + }) + .wait(); + + ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrierExt); +} + +TEST_F(EventModeTest, EventModePartialBarrier) { + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrierExt", + &after_urEnqueueEventsWaitWithBarrierExt); + + sycl::queue Q; + + sycl::ext::oneapi::experimental::properties Props{ + sycl::ext::oneapi::experimental::event_mode{ + sycl::ext::oneapi::experimental::event_mode_enum::low_power}}; + + sycl::event E = Q.prefetch(reinterpret_cast(0x1), 1); + + sycl::ext::oneapi::experimental::submit_with_event( + Q, Props, + [&](sycl::handler &CGH) { + sycl::ext::oneapi::experimental::partial_barrier(CGH, {E}); + }) + .wait(); + + ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrierExt); +} + +TEST_F(EventModeTest, EventModeInOrderFullBarrier) { + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrierExt", + &after_urEnqueueEventsWaitWithBarrierExt); + + sycl::queue Q{sycl::property::queue::in_order{}}; + + sycl::ext::oneapi::experimental::properties Props{ + sycl::ext::oneapi::experimental::event_mode{ + sycl::ext::oneapi::experimental::event_mode_enum::low_power}}; + + Q.prefetch(reinterpret_cast(0x1), 1); + + sycl::ext::oneapi::experimental::submit_with_event( + Q, Props, + [&](sycl::handler &CGH) { + sycl::ext::oneapi::experimental::barrier(CGH); + }) + .wait(); + + ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrierExt); +} + +TEST_F(EventModeTest, EventModeInOrderPartialBarrier) { + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrierExt", + &after_urEnqueueEventsWaitWithBarrierExt); + + sycl::queue Q{sycl::property::queue::in_order{}}; + + sycl::ext::oneapi::experimental::properties Props{ + sycl::ext::oneapi::experimental::event_mode{ + sycl::ext::oneapi::experimental::event_mode_enum::low_power}}; + + Q.prefetch(reinterpret_cast(0x1), 1); + + sycl::event E = Q.prefetch(reinterpret_cast(0x1), 1); + + sycl::ext::oneapi::experimental::submit_with_event( + Q, Props, + [&](sycl::handler &CGH) { + sycl::ext::oneapi::experimental::partial_barrier(CGH, {E}); + }) + .wait(); + + ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrierExt); +} diff --git a/sycl/unittests/Extensions/ProfilingTag.cpp b/sycl/unittests/Extensions/ProfilingTag.cpp index 394fba8497103..924eaf6bd66ff 100644 --- a/sycl/unittests/Extensions/ProfilingTag.cpp +++ b/sycl/unittests/Extensions/ProfilingTag.cpp @@ -38,9 +38,9 @@ inline ur_result_t after_urEventGetProfilingInfo(void *pParams) { return UR_RESULT_SUCCESS; } -thread_local size_t counter_urEnqueueEventsWaitWithBarrier = 0; -inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *) { - ++counter_urEnqueueEventsWaitWithBarrier; +inline thread_local size_t counter_urEnqueueEventsWaitWithBarrierExt = 0; +inline ur_result_t after_urEnqueueEventsWaitWithBarrierExt(void *) { + ++counter_urEnqueueEventsWaitWithBarrierExt; return UR_RESULT_SUCCESS; } @@ -51,7 +51,7 @@ class ProfilingTagTest : public ::testing::Test { protected: void SetUp() override { counter_urEnqueueTimestampRecordingExp = 0; - counter_urEnqueueEventsWaitWithBarrier = 0; + counter_urEnqueueEventsWaitWithBarrierExt = 0; LatestProfilingQuery = std::nullopt; } @@ -67,7 +67,8 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) { mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", &after_urEventGetProfilingInfo); mock::getCallbacks().set_after_callback( - "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrier", + &after_urEnqueueEventsWaitWithBarrierExt); sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, sycl::default_selector_v}; @@ -79,7 +80,7 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) { ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); // TODO: We expect two barriers for now, while marker events leak. Adjust when // addressed. - ASSERT_EQ(size_t{2}, counter_urEnqueueEventsWaitWithBarrier); + ASSERT_EQ(size_t{2}, counter_urEnqueueEventsWaitWithBarrierExt); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value()); @@ -98,7 +99,8 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedInOrderQueue) { mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", &after_urEventGetProfilingInfo); mock::getCallbacks().set_after_callback( - "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrier", + &after_urEnqueueEventsWaitWithBarrierExt); sycl::context Ctx{sycl::platform()}; sycl::queue Queue{ @@ -109,7 +111,7 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedInOrderQueue) { sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); - ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrier); + ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrierExt); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value()); @@ -156,7 +158,8 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingInOrderQueue) { mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", &after_urEventGetProfilingInfo); mock::getCallbacks().set_after_callback( - "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrier", + &after_urEnqueueEventsWaitWithBarrierExt); sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, @@ -169,7 +172,7 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingInOrderQueue) { sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); - ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrier); + ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrierExt); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value()); @@ -208,7 +211,8 @@ TEST_F(ProfilingTagTest, ProfilingTagFallbackProfilingQueue) { mock::getCallbacks().set_after_callback( "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); mock::getCallbacks().set_after_callback( - "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &after_urEnqueueEventsWaitWithBarrierExt); sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, @@ -220,5 +224,5 @@ TEST_F(ProfilingTagTest, ProfilingTagFallbackProfilingQueue) { sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); ASSERT_EQ(size_t{0}, counter_urEnqueueTimestampRecordingExp); - ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrier); + ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrierExt); } diff --git a/sycl/unittests/queue/Barrier.cpp b/sycl/unittests/queue/Barrier.cpp index a1e2160ba606f..b02eec23a7917 100644 --- a/sycl/unittests/queue/Barrier.cpp +++ b/sycl/unittests/queue/Barrier.cpp @@ -13,7 +13,7 @@ static unsigned NumOfEventsWaitWithBarrierCalls = 0; -static ur_result_t redefined_urEnqueueEventsWaitWithBarrier(void *) { +static ur_result_t redefined_urEnqueueEventsWaitWithBarrierExt(void *) { NumOfEventsWaitWithBarrierCalls++; return UR_RESULT_SUCCESS; @@ -22,8 +22,8 @@ static ur_result_t redefined_urEnqueueEventsWaitWithBarrier(void *) { TEST(Queue, HandlerBarrier) { sycl::unittest::UrMock<> Mock; mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrier", - &redefined_urEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &redefined_urEnqueueEventsWaitWithBarrierExt); NumOfEventsWaitWithBarrierCalls = 0; sycl::queue Q; @@ -41,8 +41,8 @@ TEST(Queue, HandlerBarrier) { TEST(Queue, ExtOneAPISubmitBarrier) { sycl::unittest::UrMock<> Mock; mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrier", - &redefined_urEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &redefined_urEnqueueEventsWaitWithBarrierExt); NumOfEventsWaitWithBarrierCalls = 0; sycl::queue Q; @@ -60,8 +60,8 @@ TEST(Queue, ExtOneAPISubmitBarrier) { TEST(Queue, HandlerBarrierWithWaitList) { sycl::unittest::UrMock<> Mock; mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrier", - &redefined_urEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &redefined_urEnqueueEventsWaitWithBarrierExt); NumOfEventsWaitWithBarrierCalls = 0; sycl::queue Q1; @@ -81,8 +81,8 @@ TEST(Queue, HandlerBarrierWithWaitList) { TEST(Queue, ExtOneAPISubmitBarrierWithWaitList) { sycl::unittest::UrMock<> Mock; mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrier", - &redefined_urEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &redefined_urEnqueueEventsWaitWithBarrierExt); NumOfEventsWaitWithBarrierCalls = 0; sycl::queue Q1; diff --git a/sycl/unittests/scheduler/Commands.cpp b/sycl/unittests/scheduler/Commands.cpp index 9ebd256a2f587..138648986519b 100644 --- a/sycl/unittests/scheduler/Commands.cpp +++ b/sycl/unittests/scheduler/Commands.cpp @@ -15,9 +15,9 @@ using namespace sycl; -ur_result_t redefineEnqueueEventsWaitWithBarrier(void *pParams) { +ur_result_t redefineEnqueueEventsWaitWithBarrierExt(void *pParams) { auto params = - *static_cast(pParams); + *static_cast(pParams); for (uint32_t i = 0; i != *params.pnumEventsInWaitList; ++i) EXPECT_NE((*params.pphEventWaitList)[i], nullptr); @@ -51,7 +51,8 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { sycl::platform Plt = sycl::platform(); mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrier", &redefineEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &redefineEnqueueEventsWaitWithBarrierExt); queue Queue{Plt.get_devices()[0]}; sycl::detail::QueueImplPtr QueueImpl = detail::getSyclObjImpl(Queue); @@ -77,7 +78,8 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { for (auto &Arg : InputEventWaitLists) { std::unique_ptr CommandGroup(new detail::CGBarrier( - std::move(Arg), detail::CG::StorageInitHelper({}, {}, {}, {}, {}), + std::move(Arg), ext::oneapi::experimental::event_mode_enum::none, + detail::CG::StorageInitHelper({}, {}, {}, {}, {}), detail::CGType::BarrierWaitlist, {})); MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl, /*EventNeeded=*/true); diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index d99ce5f6e0f3f..9366d63838d08 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -303,9 +303,9 @@ ur_result_t redefinedextUSMEnqueueMemcpy(void *pParams) { return UR_RESULT_SUCCESS; } -ur_result_t redefinedEnqueueEventsWaitWithBarrier(void *pParams) { +ur_result_t redefinedEnqueueEventsWaitWithBarrierExt(void *pParams) { auto params = - *static_cast(pParams); + *static_cast(pParams); **params.pphEvent = mock::createDummyHandle(); for (auto i = 0u; i < *params.pnumEventsInWaitList; i++) { EventsInWaitList.push_back((*params.pphEventWaitList)[i]); @@ -362,7 +362,8 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { TEST_F(DependsOnTests, BarrierWithWaitList) { mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrier", &redefinedEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &redefinedEnqueueEventsWaitWithBarrierExt); sycl::queue Queue = detail::createSyclObjFromImpl(QueueDevImpl); auto HostTaskEvent = diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index 8a26849fc6255..2acd593d14bf3 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -87,9 +87,9 @@ TEST_F(SchedulerTest, InOrderQueueDeps) { bool BarrierCalled = false; ur_event_handle_t ExpectedEvent = nullptr; -ur_result_t redefinedEnqueueEventsWaitWithBarrier(void *pParams) { +ur_result_t redefinedEnqueueEventsWaitWithBarrierExt(void *pParams) { auto params = - *static_cast(pParams); + *static_cast(pParams); EXPECT_EQ(*params.pnumEventsInWaitList, 1u); EXPECT_EQ(ExpectedEvent, **params.pphEventWaitList); BarrierCalled = true; @@ -107,7 +107,8 @@ TEST_F(SchedulerTest, InOrderQueueIsolatedDeps) { sycl::unittest::UrMock<> Mock; sycl::platform Plt = sycl::platform(); mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrier", &redefinedEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &redefinedEnqueueEventsWaitWithBarrierExt); BarrierCalled = false; context Ctx{Plt.get_devices()[0]}; @@ -197,7 +198,8 @@ TEST_F(SchedulerTest, BypassSchedulerWithBarrier) { sycl::platform Plt = sycl::platform(); mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrier", &redefinedEnqueueEventsWaitWithBarrier); + "urEnqueueEventsWaitWithBarrierExt", + &redefinedEnqueueEventsWaitWithBarrierExt); BarrierCalled = false; context Ctx{Plt};