diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 3dc28532b2372..718c5850b3f9a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -259,14 +259,15 @@ template void nd_launch(queue Q, nd_range Range, const KernelType &KernelObj, ReductionsT &&...Reductions) { -#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - // TODO The handler-less path does not support reductions yet. - if constexpr (sizeof...(ReductionsT) == 0) { + // TODO The handler-less path does not support reductions and kernel function + // properties yet. + if constexpr (sizeof...(ReductionsT) == 0 && + !(ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod< + const KernelType &>::value)) { detail::submit_kernel_direct(std::move(Q), empty_properties_t{}, Range, KernelObj); - } else -#endif - { + } else { submit(std::move(Q), [&](handler &CGH) { nd_launch(CGH, Range, KernelObj, std::forward(Reductions)...); @@ -292,23 +293,10 @@ template void nd_launch(queue Q, launch_config, Properties> Config, const KernelType &KernelObj, ReductionsT &&...Reductions) { -#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - // TODO The handler-less path does not support reductions yet. - if constexpr (sizeof...(ReductionsT) == 0) { - ext::oneapi::experimental::detail::LaunchConfigAccess, - Properties> - ConfigAccess(Config); - detail::submit_kernel_direct( - std::move(Q), ConfigAccess.getProperties(), ConfigAccess.getRange(), - KernelObj); - } else -#endif - { - submit(std::move(Q), [&](handler &CGH) { - nd_launch(CGH, Config, KernelObj, - std::forward(Reductions)...); - }); - } + submit(std::move(Q), [&](handler &CGH) { + nd_launch(CGH, Config, KernelObj, + std::forward(Reductions)...); + }); } template diff --git a/sycl/include/sycl/khr/free_function_commands.hpp b/sycl/include/sycl/khr/free_function_commands.hpp index 31464ba588dfc..e1afe00672f21 100644 --- a/sycl/include/sycl/khr/free_function_commands.hpp +++ b/sycl/include/sycl/khr/free_function_commands.hpp @@ -157,45 +157,54 @@ template r, range<1> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { -#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - detail::submit_kernel_direct( - q, ext::oneapi::experimental::empty_properties_t{}, nd_range<1>(r, size), - std::forward(k)); -#else - submit( - q, [&](handler &h) { launch_grouped(h, r, size, k); }, - codeLoc); -#endif + // TODO The handler-less path does not support kernel function properties yet. + if constexpr (!(ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod< + const KernelType &>::value)) { + detail::submit_kernel_direct( + q, ext::oneapi::experimental::empty_properties_t{}, + nd_range<1>(r, size), std::forward(k)); + } else { + submit( + q, [&](handler &h) { launch_grouped(h, r, size, k); }, + codeLoc); + } } template >> void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { -#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - detail::submit_kernel_direct( - q, ext::oneapi::experimental::empty_properties_t{}, nd_range<2>(r, size), - std::forward(k)); -#else - submit( - q, [&](handler &h) { launch_grouped(h, r, size, k); }, - codeLoc); -#endif + // TODO The handler-less path does not support kernel function properties yet. + if constexpr (!(ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod< + const KernelType &>::value)) { + detail::submit_kernel_direct( + q, ext::oneapi::experimental::empty_properties_t{}, + nd_range<2>(r, size), std::forward(k)); + } else { + submit( + q, [&](handler &h) { launch_grouped(h, r, size, k); }, + codeLoc); + } } template >> void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { -#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - detail::submit_kernel_direct( - q, ext::oneapi::experimental::empty_properties_t{}, nd_range<3>(r, size), - std::forward(k)); -#else - submit( - q, [&](handler &h) { launch_grouped(h, r, size, k); }, - codeLoc); -#endif + // TODO The handler-less path does not support kernel function properties yet. + if constexpr (!(ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod< + const KernelType &>::value)) { + detail::submit_kernel_direct( + q, ext::oneapi::experimental::empty_properties_t{}, + nd_range<3>(r, size), std::forward(k)); + } else { + submit( + q, [&](handler &h) { launch_grouped(h, r, size, k); }, + codeLoc); + } } template diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 69911bec229fc..6e3f6b00f67ef 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -3275,15 +3275,19 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { parallel_for(nd_range Range, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); -#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - // TODO The handler-less path does not support reductions yet. - if constexpr (sizeof...(RestT) == 1) { + + using KernelType = std::tuple_element_t<0, std::tuple>; + + // TODO The handler-less path does not support reductions and kernel + // function properties yet. + if constexpr (sizeof...(RestT) == 1 && + !(ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod< + const KernelType &>::value)) { return detail::submit_kernel_direct( *this, ext::oneapi::experimental::empty_properties_t{}, Range, Rest...); - } else -#endif - { + } else { return submit( [&](handler &CGH) { CGH.template parallel_for(Range, Rest...); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 8243896cf76d1..b8c2a2866a625 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -420,6 +420,94 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, return EventImpl; } +EventImplPtr queue_impl::submit_kernel_scheduler_bypass( + KernelData &KData, std::vector &DepEvents, + bool EventNeeded, detail::kernel_impl *KernelImplPtr, + detail::kernel_bundle_impl *KernelBundleImpPtr, + const detail::code_location &CodeLoc, bool IsTopCodeLoc) { + std::vector RawEvents; + + // TODO checking the size of the events vector and avoiding the call is + // more efficient here at this point + if (DepEvents.size() > 0) { + RawEvents = detail::Command::getUrEvents(DepEvents, this, false); + } + + bool DiscardEvent = !EventNeeded && supportsDiscardingPiEvents(); + if (DiscardEvent) { + // Kernel only uses assert if it's non interop one + bool KernelUsesAssert = + !(KernelImplPtr && KernelImplPtr->isInterop()) && KData.usesAssert(); + DiscardEvent = !KernelUsesAssert; + } + + std::shared_ptr ResultEvent = + DiscardEvent ? nullptr : detail::event_impl::create_device_event(*this); + + auto EnqueueKernel = [&]() { +#ifdef XPTI_ENABLE_INSTRUMENTATION + xpti_td *CmdTraceEvent = nullptr; + uint64_t InstanceID = 0; + auto StreamID = detail::getActiveXPTIStreamID(); + // Only enable instrumentation if there are subscribes to the SYCL + // stream + const bool xptiEnabled = xptiCheckTraceEnabled(StreamID); + if (xptiEnabled) { + std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( + StreamID, KernelImplPtr, CodeLoc, IsTopCodeLoc, + *KData.getDeviceKernelInfoPtr(), this, KData.getNDRDesc(), + KernelBundleImpPtr, KData.getArgs()); + detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent, + xpti::trace_task_begin, nullptr); + } +#endif + const detail::RTDeviceBinaryImage *BinImage = nullptr; + if (detail::SYCLConfig::get()) { + BinImage = detail::retrieveKernelBinary(*this, KData.getKernelName()); + assert(BinImage && "Failed to obtain a binary image."); + } + enqueueImpKernel(*this, KData.getNDRDesc(), KData.getArgs(), + KernelBundleImpPtr, KernelImplPtr, + *KData.getDeviceKernelInfoPtr(), RawEvents, + ResultEvent.get(), nullptr, KData.getKernelCacheConfig(), + KData.isCooperative(), KData.usesClusterLaunch(), + KData.getKernelWorkGroupMemorySize(), BinImage, + KData.getKernelFuncPtr()); +#ifdef XPTI_ENABLE_INSTRUMENTATION + if (xptiEnabled) { + // Emit signal only when event is created + if (!DiscardEvent) { + detail::emitInstrumentationGeneral( + StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal, + static_cast(ResultEvent->getHandle())); + } + detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent, + xpti::trace_task_end, nullptr); + } +#endif + }; + + if (DiscardEvent) { + EnqueueKernel(); + } else { + ResultEvent->setWorkerQueue(weak_from_this()); + ResultEvent->setStateIncomplete(); + ResultEvent->setSubmissionTime(); + + EnqueueKernel(); + ResultEvent->setEnqueued(); + // connect returned event with dependent events + if (!isInOrder()) { + // DepEvents is not used anymore, so can move. + ResultEvent->getPreparedDepsEvents() = std::move(DepEvents); + // ResultEvent is local for current thread, no need to lock. + ResultEvent->cleanDepEventsThroughOneLevelUnlocked(); + } + } + + return ResultEvent; +} + EventImplPtr queue_impl::submit_command_to_graph( ext::oneapi::experimental::detail::graph_impl &GraphImpl, std::unique_ptr CommandGroup, sycl::detail::CGType CGType, @@ -475,26 +563,31 @@ EventImplPtr queue_impl::submit_command_to_graph( return EventImpl; } -detail::EventImplPtr queue_impl::submit_kernel_direct_impl( +EventImplPtr queue_impl::submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { KernelData KData; - std::shared_ptr HostKernelPtr = - HostKernel.takeOrCopyOwnership(); - KData.setDeviceKernelInfoPtr(DeviceKernelInfo); - KData.setKernelFunc(HostKernelPtr->getPtr()); + KData.setKernelFunc(HostKernel.getPtr()); KData.setNDRDesc(NDRDesc); - auto SubmitKernelFunc = - [&](detail::CG::StorageInitHelper &CGData) -> EventImplPtr { + auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData, + bool SchedulerBypass) -> EventImplPtr { + if (SchedulerBypass) { + return submit_kernel_scheduler_bypass(KData, CGData.MEvents, + CallerNeedsEvent, nullptr, nullptr, + CodeLoc, IsTopCodeLoc); + } std::unique_ptr CommandGroup; std::vector> StreamStorage; std::vector> AuxiliaryResources; + std::shared_ptr HostKernelPtr = + HostKernel.takeOrCopyOwnership(); + KData.extractArgsAndReqsFromLambda(); CommandGroup.reset(new detail::CGExecKernel( @@ -504,10 +597,8 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl( std::move(CGData), std::move(KData).getArgs(), *KData.getDeviceKernelInfoPtr(), std::move(StreamStorage), std::move(AuxiliaryResources), detail::CGType::Kernel, - UR_KERNEL_CACHE_CONFIG_DEFAULT, - false, // KernelIsCooperative - false, // KernelUsesClusterLaunch - 0, // KernelWorkGroupMemorySize + KData.getKernelCacheConfig(), KData.isCooperative(), + KData.usesClusterLaunch(), KData.getKernelWorkGroupMemorySize(), CodeLoc)); CommandGroup->MIsTopCodeLoc = IsTopCodeLoc; @@ -530,9 +621,6 @@ queue_impl::submit_direct(bool CallerNeedsEvent, detail::CG::StorageInitHelper CGData; std::unique_lock Lock(MMutex); - // Graphs are not supported yet for the no-handler path - assert(!hasCommandGraph()); - // Set the No Last Event Mode to false, since the no-handler path // does not support it yet. MNoLastEventMode.store(false, std::memory_order_relaxed); @@ -567,11 +655,21 @@ queue_impl::submit_direct(bool CallerNeedsEvent, } } - EventImplPtr EventImpl = SubmitCommandFunc(CGData); + bool SchedulerBypass = + (CGData.MEvents.size() > 0 + ? detail::Scheduler::areEventsSafeForSchedulerBypass( + CGData.MEvents, getContextImpl()) + : true) && + !hasCommandGraph(); - // Sync with the last event for in order queue - if (isInOrder() && !EventImpl->isDiscarded()) { - LastEvent = EventImpl; + EventImplPtr EventImpl = SubmitCommandFunc(CGData, SchedulerBypass); + + // Sync with the last event for in order queue. For scheduler-bypass flow, + // the ordering is done at the layers below the SYCL runtime, + // but for the scheduler-based flow, it needs to be done here, as the + // scheduler handles host task submissions. + if (isInOrder()) { + LastEvent = SchedulerBypass ? nullptr : EventImpl; } // Barrier and un-enqueued commands synchronization for out or order queue diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index b3cfb474e7926..49da7aee8c448 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -386,6 +386,24 @@ class queue_impl : public std::enable_shared_from_this { submit_impl(CGF, /*CallerNeedsEvent=*/false, Loc, IsTopCodeLoc, SubmitInfo); } + /// Submits a kernel using the scheduler bypass fast path + /// + /// \param KData is an object storing data related to the kernel. + /// \param DepEvents is a list of event dependencies. + /// \param EventNeeded should be true, if the resulting event is needed. + /// \param Kernel to be used, if kernel defined as a kernel object. + /// \param KernelBundleImpPtr to be used, if kernel bundle defined. + /// \param CodeLoc is the code location of the submit call. + /// \param IsTopCodeLoc is used to determine if the object is in a local + /// scope or in the top level scope. + /// + /// \return a SYCL event representing submitted command or nullptr. + EventImplPtr submit_kernel_scheduler_bypass( + KernelData &KData, std::vector &DepEvents, + bool EventNeeded, detail::kernel_impl *KernelImplPtr, + detail::kernel_bundle_impl *KernelBundleImpPtr, + const detail::code_location &CodeLoc, bool IsTopCodeLoc); + /// Performs a blocking wait for the completion of all enqueued tasks in the /// queue. /// @@ -908,14 +926,14 @@ class queue_impl : public std::enable_shared_from_this { /// scope or in the top level scope. /// /// \return a SYCL event representing submitted command group or nullptr. - detail::EventImplPtr submit_kernel_direct_impl( + EventImplPtr submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template - detail::EventImplPtr submit_direct(bool CallerNeedsEvent, - SubmitCommandFuncType &SubmitCommandFunc); + EventImplPtr submit_direct(bool CallerNeedsEvent, + SubmitCommandFuncType &SubmitCommandFunc); /// Helper function for submitting a memory operation with a handler. /// \param DepEvents is a vector of dependencies of the operation. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 47a97a812eefa..26477c99be62c 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -638,97 +638,11 @@ event handler::finalize() { // the graph is not changed, then this faster path is used to submit // kernel bypassing scheduler and avoiding CommandGroup, Command objects // creation. - std::vector RawEvents; - // TODO checking the size of the events vector and avoiding the call is - // more efficient here at this point - if (impl->CGData.MEvents.size() > 0) { - RawEvents = detail::Command::getUrEvents( - impl->CGData.MEvents, impl->get_queue_or_null(), false); - } - - bool DiscardEvent = - !impl->MEventNeeded && impl->get_queue().supportsDiscardingPiEvents(); - if (DiscardEvent) { - // Kernel only uses assert if it's non interop one - bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && - impl->MKernelData.usesAssert(); - DiscardEvent = !KernelUsesAssert; - } - - std::shared_ptr ResultEvent = - DiscardEvent - ? nullptr - : detail::event_impl::create_device_event(impl->get_queue()); - auto EnqueueKernel = [&]() { -#ifdef XPTI_ENABLE_INSTRUMENTATION - xpti_td *CmdTraceEvent = nullptr; - uint64_t InstanceID = 0; - auto StreamID = detail::getActiveXPTIStreamID(); - // Only enable instrumentation if there are subscribes to the SYCL - // stream - const bool xptiEnabled = xptiCheckTraceEnabled(StreamID); - if (xptiEnabled) { - std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( - StreamID, MKernel.get(), MCodeLoc, impl->MIsTopCodeLoc, - *impl->MKernelData.getDeviceKernelInfoPtr(), - impl->get_queue_or_null(), impl->MKernelData.getNDRDesc(), - KernelBundleImpPtr, impl->MKernelData.getArgs()); - detail::emitInstrumentationGeneral(StreamID, InstanceID, - CmdTraceEvent, - xpti::trace_task_begin, nullptr); - } -#endif - const detail::RTDeviceBinaryImage *BinImage = nullptr; - if (detail::SYCLConfig::get()) { - BinImage = detail::retrieveKernelBinary(impl->get_queue(), - impl->getKernelName()); - assert(BinImage && "Failed to obtain a binary image."); - } - enqueueImpKernel(impl->get_queue(), impl->MKernelData.getNDRDesc(), - impl->MKernelData.getArgs(), KernelBundleImpPtr, - MKernel.get(), - *impl->MKernelData.getDeviceKernelInfoPtr(), RawEvents, - ResultEvent.get(), nullptr, - impl->MKernelData.getKernelCacheConfig(), - impl->MKernelData.isCooperative(), - impl->MKernelData.usesClusterLaunch(), - impl->MKernelData.getKernelWorkGroupMemorySize(), - BinImage, impl->MKernelData.getKernelFuncPtr()); -#ifdef XPTI_ENABLE_INSTRUMENTATION - if (xptiEnabled) { - // Emit signal only when event is created - if (!DiscardEvent) { - detail::emitInstrumentationGeneral( - StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal, - static_cast(ResultEvent->getHandle())); - } - detail::emitInstrumentationGeneral(StreamID, InstanceID, - CmdTraceEvent, - xpti::trace_task_end, nullptr); - } -#endif - }; - - if (DiscardEvent) { - EnqueueKernel(); - } else { - detail::queue_impl &Queue = impl->get_queue(); - ResultEvent->setWorkerQueue(Queue.weak_from_this()); - ResultEvent->setStateIncomplete(); - ResultEvent->setSubmissionTime(); - - EnqueueKernel(); - ResultEvent->setEnqueued(); - // connect returned event with dependent events - if (!Queue.isInOrder()) { - // MEvents is not used anymore, so can move. - ResultEvent->getPreparedDepsEvents() = - std::move(impl->CGData.MEvents); - // ResultEvent is local for current thread, no need to lock. - ResultEvent->cleanDepEventsThroughOneLevelUnlocked(); - } - } + detail::EventImplPtr ResultEvent = + impl->get_queue().submit_kernel_scheduler_bypass( + impl->MKernelData, impl->CGData.MEvents, impl->MEventNeeded, + MKernel.get(), KernelBundleImpPtr, MCodeLoc, impl->MIsTopCodeLoc); #ifdef __INTEL_PREVIEW_BREAKING_CHANGES return ResultEvent; #else diff --git a/sycl/test-e2e/Basic/test_num_kernel_copies.cpp b/sycl/test-e2e/Basic/test_num_kernel_copies.cpp index 7c1781e873a39..98987c13c196e 100644 --- a/sycl/test-e2e/Basic/test_num_kernel_copies.cpp +++ b/sycl/test-e2e/Basic/test_num_kernel_copies.cpp @@ -29,7 +29,7 @@ int main(int argc, char **argv) { kernel<1> krn1; q.parallel_for(sycl::nd_range<1>{1, 1}, krn1); - assert(copy_count == 1); + assert(copy_count == 0); assert(move_count == 0); copy_count = 0; diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp index 360bdca27e73f..ca0753013c03b 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp +++ b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp @@ -227,7 +227,6 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutNoEvent) { ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } -#if __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutMoveKernelNoEvent) { mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", @@ -235,16 +234,47 @@ TEST_F(FreeFunctionCommandsEventsTests, TestMoveFunctor::MoveCtorCalls = 0; TestMoveFunctor MoveOnly; + std::mutex CvMutex; + std::condition_variable Cv; + bool ready = false; + + // This kernel submission uses scheduler-bypass path, so the HostKernel + // shouldn't be constructed. + + sycl::khr::launch_grouped(Queue, sycl::range<1>{32}, sycl::range<1>{32}, + std::move(MoveOnly)); + + ASSERT_EQ(TestMoveFunctor::MoveCtorCalls, 0); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); + + // Another kernel submission is queued behind a host task, + // to force the scheduler-based submission. In this case, the HostKernel + // should be constructed. + + Queue.submit([&](sycl::handler &CGH) { + CGH.host_task([&] { + std::unique_lock lk(CvMutex); + Cv.wait(lk, [&ready] { return ready; }); + }); + }); + sycl::khr::launch_grouped(Queue, sycl::range<1>{32}, sycl::range<1>{32}, std::move(MoveOnly)); + + { + std::unique_lock lk(CvMutex); + ready = true; + } + Cv.notify_one(); + + Queue.wait(); + // Move ctor for TestMoveFunctor is called during move construction of // HostKernel. Copy ctor is called by InstantiateKernelOnHost, can't delete // it. ASSERT_EQ(TestMoveFunctor::MoveCtorCalls, 1); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{2}); } -#endif TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) { mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp index e0a5c9be50c15..c275f7d03cc9d 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp +++ b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp @@ -26,53 +26,53 @@ inline ur_result_t after_urKernelGetInfo(void *pParams) { static thread_local size_t counter_urEnqueueKernelLaunch = 0; inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) { ++counter_urEnqueueKernelLaunch; -// TODO The no-handler scheduler submission includes a fix for the event return, -// where the event is returned by the scheduler on every submission. This fix -// is not yet applied to the handler-based path. -#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); -#endif + // TODO The no-handler scheduler submission includes a fix for the event + // return, where the event is returned by the scheduler on every submission. + // This fix is not yet applied to the handler-based path. #ifndef + // __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT + // auto params = *static_cast(pParams); + // EXPECT_EQ(*params.pphEvent, nullptr); + // #endif return UR_RESULT_SUCCESS; } static thread_local size_t counter_urUSMEnqueueMemcpy = 0; inline ur_result_t redefined_urUSMEnqueueMemcpy(void *pParams) { ++counter_urUSMEnqueueMemcpy; -#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); -#endif + // #ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT + // auto params = *static_cast(pParams); + // EXPECT_EQ(*params.pphEvent, nullptr); + // #endif return UR_RESULT_SUCCESS; } static thread_local size_t counter_urUSMEnqueueFill = 0; inline ur_result_t redefined_urUSMEnqueueFill(void *pParams) { ++counter_urUSMEnqueueFill; -#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); -#endif + // #ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT + // auto params = *static_cast(pParams); + // EXPECT_EQ(*params.pphEvent, nullptr); + // #endif return UR_RESULT_SUCCESS; } static thread_local size_t counter_urUSMEnqueuePrefetch = 0; inline ur_result_t redefined_urUSMEnqueuePrefetch(void *pParams) { ++counter_urUSMEnqueuePrefetch; -#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); -#endif + // #ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT + // auto params = *static_cast(pParams); + // EXPECT_EQ(*params.pphEvent, nullptr); + // #endif return UR_RESULT_SUCCESS; } static thread_local size_t counter_urUSMEnqueueMemAdvise = 0; inline ur_result_t redefined_urUSMEnqueueMemAdvise(void *pParams) { ++counter_urUSMEnqueueMemAdvise; -#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); -#endif + // #ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT + // auto params = *static_cast(pParams); + // EXPECT_EQ(*params.pphEvent, nullptr); + // #endif return UR_RESULT_SUCCESS; }