@@ -456,7 +456,8 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
456456 assert (!KRInfo.DeviceKernelInfoPtr ()->HasSpecialCaptures );
457457
458458 SubmitCommandFuncType SubmitKernelFunc =
459- [&](detail::CG::StorageInitHelper &CGData) -> EventImplPtr {
459+ [&](detail::CG::StorageInitHelper &CGData)
460+ -> std::pair<EventImplPtr, bool > {
460461 std::vector<detail::ArgDesc> Args;
461462 bool DiscardEvent = !CallerNeedsEvent && supportsDiscardingPiEvents ();
462463
@@ -471,13 +472,32 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
471472 DiscardEvent ? nullptr
472473 : detail::event_impl::create_device_event (*this );
473474
475+ if (!DiscardEvent) {
476+ ResultEvent->setWorkerQueue (weak_from_this ());
477+ ResultEvent->setStateIncomplete ();
478+ ResultEvent->setSubmissionTime ();
479+ }
480+
474481 enqueueImpKernel (
475482 *this , NDRDesc, Args, nullptr , nullptr ,
476483 toKernelNameStrT (KRInfo.KernelName ()), *KRInfo.DeviceKernelInfoPtr (),
477484 RawEvents, ResultEvent.get (), nullptr , UR_KERNEL_CACHE_CONFIG_DEFAULT,
478- false , false , 0 , nullptr );
485+ false , false , 0 , nullptr , KRInfo.GetKernelFuncPtr (),
486+ KRInfo.DeviceKernelInfoPtr ()->NumParams ,
487+ KRInfo.DeviceKernelInfoPtr ()->ParamDescGetter , false );
488+
489+ if (!DiscardEvent) {
490+ ResultEvent->setEnqueued ();
491+ // connect returned event with dependent events
492+ if (!isInOrder ()) {
493+ // MEvents is not used anymore, so can move.
494+ ResultEvent->getPreparedDepsEvents () = std::move (CGData.MEvents );
495+ // ResultEvent is local for current thread, no need to lock.
496+ ResultEvent->cleanDepEventsThroughOneLevelUnlocked ();
497+ }
498+ }
479499
480- return ResultEvent;
500+ return { ResultEvent, true } ;
481501 } else {
482502 std::unique_ptr<detail::CG> CommandGroup;
483503 std::vector<std::shared_ptr<detail::stream_impl>> StreamStorage;
@@ -507,7 +527,7 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
507527
508528 EventImplPtr EventImpl = detail::Scheduler::getInstance ().addCG (
509529 std::move (CommandGroup), *this , !DiscardEvent);
510- return EventImpl;
530+ return {DiscardEvent ? nullptr : EventImpl, false } ;
511531 }
512532 };
513533
@@ -557,11 +577,15 @@ queue_impl::submit_direct(bool CallerNeedsEvent,
557577 }
558578 }
559579
560- EventImplPtr EventImpl = SubmitCommandFunc (CGData);
580+ auto [ EventImpl, SchedulerBypass] = SubmitCommandFunc (CGData);
561581
562582 // Sync with the last event for in order queue
563- if (isInOrder () && EventImpl && !EventImpl->isDiscarded ()) {
564- LastEvent = EventImpl;
583+ if (isInOrder ()) {
584+ if (SchedulerBypass) {
585+ LastEvent = nullptr ;
586+ } else if (EventImpl) {
587+ LastEvent = EventImpl;
588+ }
565589 }
566590
567591 // Barrier and un-enqueued commands synchronization for out or order queue
0 commit comments