@@ -448,7 +448,7 @@ std::vector<ArgDesc> queue_impl::extractArgsAndReqsFromLambda(
448448}
449449
450450detail::EventImplPtr queue_impl::submit_kernel_direct_impl (
451- const NDRDescT &NDRDesc, const v1::KernelRuntimeInfo &KRInfo,
451+ NDRDescT &NDRDesc, const v1::KernelRuntimeInfo &KRInfo,
452452 bool CallerNeedsEvent, const detail::code_location &CodeLoc,
453453 bool IsTopCodeLoc) {
454454
@@ -457,37 +457,58 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
457457
458458 SubmitCommandFuncType SubmitKernelFunc =
459459 [&](detail::CG::StorageInitHelper &CGData) -> EventImplPtr {
460- std::unique_ptr<detail::CG> CommandGroup;
461460 std::vector<detail::ArgDesc> Args;
462- std::vector<std::shared_ptr<detail::stream_impl>> StreamStorage;
463- std::vector<std::shared_ptr<const void >> AuxiliaryResources;
464-
465- Args = extractArgsAndReqsFromLambda (
466- KRInfo.GetKernelFuncPtr (),
467- KRInfo.DeviceKernelInfoPtr ()->ParamDescGetter ,
468- KRInfo.DeviceKernelInfoPtr ()->NumParams );
469-
470- CommandGroup.reset (new detail::CGExecKernel (
471- std::move (NDRDesc), KRInfo.HostKernel (),
472- nullptr , // MKernel
473- nullptr , // MKernelBundle
474- std::move (CGData), std::move (Args),
475- toKernelNameStrT (KRInfo.KernelName ()), *KRInfo.DeviceKernelInfoPtr (),
476- std::move (StreamStorage), std::move (AuxiliaryResources),
477- detail::CGType::Kernel, UR_KERNEL_CACHE_CONFIG_DEFAULT,
478- false , // MKernelIsCooperative
479- false , // MKernelUsesClusterLaunch
480- 0 , // MKernelWorkGroupMemorySize
481- CodeLoc));
482- CommandGroup->MIsTopCodeLoc = IsTopCodeLoc;
483-
484- // TODO DiscardEvent should include a check for requirements list
485- // once accessors are implemented
486461 bool DiscardEvent = !CallerNeedsEvent && supportsDiscardingPiEvents ();
487462
488- EventImplPtr EventImpl = detail::Scheduler::getInstance ().addCG (
489- std::move (CommandGroup), *this , !DiscardEvent);
490- return EventImpl;
463+ bool SchedulerBypass = detail::Scheduler::areEventsSafeForSchedulerBypass (
464+ CGData.MEvents , getContextImpl ());
465+
466+ if (SchedulerBypass) {
467+ std::vector<ur_event_handle_t > RawEvents =
468+ detail::Command::getUrEvents (CGData.MEvents , this , false );
469+
470+ std::shared_ptr<detail::event_impl> ResultEvent =
471+ DiscardEvent ? nullptr
472+ : detail::event_impl::create_device_event (*this );
473+
474+ enqueueImpKernel (
475+ *this , NDRDesc, Args, nullptr , nullptr ,
476+ toKernelNameStrT (KRInfo.KernelName ()), *KRInfo.DeviceKernelInfoPtr (),
477+ RawEvents, ResultEvent.get (), nullptr , UR_KERNEL_CACHE_CONFIG_DEFAULT,
478+ false , false , 0 , nullptr );
479+
480+ return ResultEvent;
481+ } else {
482+ std::unique_ptr<detail::CG> CommandGroup;
483+ std::vector<std::shared_ptr<detail::stream_impl>> StreamStorage;
484+ std::vector<std::shared_ptr<const void >> AuxiliaryResources;
485+
486+ Args = extractArgsAndReqsFromLambda (
487+ KRInfo.GetKernelFuncPtr (),
488+ KRInfo.DeviceKernelInfoPtr ()->ParamDescGetter ,
489+ KRInfo.DeviceKernelInfoPtr ()->NumParams );
490+
491+ CommandGroup.reset (new detail::CGExecKernel (
492+ std::move (NDRDesc), KRInfo.HostKernel (),
493+ nullptr , // MKernel
494+ nullptr , // MKernelBundle
495+ std::move (CGData), std::move (Args),
496+ toKernelNameStrT (KRInfo.KernelName ()), *KRInfo.DeviceKernelInfoPtr (),
497+ std::move (StreamStorage), std::move (AuxiliaryResources),
498+ detail::CGType::Kernel, UR_KERNEL_CACHE_CONFIG_DEFAULT,
499+ false , // MKernelIsCooperative
500+ false , // MKernelUsesClusterLaunch
501+ 0 , // MKernelWorkGroupMemorySize
502+ CodeLoc));
503+ CommandGroup->MIsTopCodeLoc = IsTopCodeLoc;
504+
505+ // TODO DiscardEvent should include a check for requirements list
506+ // once accessors are implemented
507+
508+ EventImplPtr EventImpl = detail::Scheduler::getInstance ().addCG (
509+ std::move (CommandGroup), *this , !DiscardEvent);
510+ return EventImpl;
511+ }
491512 };
492513
493514 return submit_direct (CallerNeedsEvent, SubmitKernelFunc);
@@ -539,12 +560,12 @@ queue_impl::submit_direct(bool CallerNeedsEvent,
539560 EventImplPtr EventImpl = SubmitCommandFunc (CGData);
540561
541562 // Sync with the last event for in order queue
542- if (isInOrder () && !EventImpl->isDiscarded ()) {
563+ if (isInOrder () && EventImpl && !EventImpl->isDiscarded ()) {
543564 LastEvent = EventImpl;
544565 }
545566
546567 // Barrier and un-enqueued commands synchronization for out or order queue
547- if (!isInOrder () && !EventImpl->isEnqueued ()) {
568+ if (!isInOrder () && EventImpl && !EventImpl->isEnqueued ()) {
548569 MDefaultGraphDeps.UnenqueuedCmdEvents .push_back (EventImpl);
549570 }
550571
0 commit comments