Skip to content

Commit 7b95fb1

Browse files
committed
Added scheduler-bypass flow to no-handler
1 parent 63d1345 commit 7b95fb1

File tree

2 files changed

+80
-44
lines changed

2 files changed

+80
-44
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 52 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ std::vector<ArgDesc> queue_impl::extractArgsAndReqsFromLambda(
448448
}
449449

450450
detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
451-
const NDRDescT &NDRDesc, const v1::KernelRuntimeInfo &KRInfo,
451+
NDRDescT &NDRDesc, const v1::KernelRuntimeInfo &KRInfo,
452452
bool CallerNeedsEvent, const detail::code_location &CodeLoc,
453453
bool IsTopCodeLoc) {
454454

@@ -457,37 +457,58 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
457457

458458
SubmitCommandFuncType SubmitKernelFunc =
459459
[&](detail::CG::StorageInitHelper &CGData) -> EventImplPtr {
460-
std::unique_ptr<detail::CG> CommandGroup;
461460
std::vector<detail::ArgDesc> Args;
462-
std::vector<std::shared_ptr<detail::stream_impl>> StreamStorage;
463-
std::vector<std::shared_ptr<const void>> AuxiliaryResources;
464-
465-
Args = extractArgsAndReqsFromLambda(
466-
KRInfo.GetKernelFuncPtr(),
467-
KRInfo.DeviceKernelInfoPtr()->ParamDescGetter,
468-
KRInfo.DeviceKernelInfoPtr()->NumParams);
469-
470-
CommandGroup.reset(new detail::CGExecKernel(
471-
std::move(NDRDesc), KRInfo.HostKernel(),
472-
nullptr, // MKernel
473-
nullptr, // MKernelBundle
474-
std::move(CGData), std::move(Args),
475-
toKernelNameStrT(KRInfo.KernelName()), *KRInfo.DeviceKernelInfoPtr(),
476-
std::move(StreamStorage), std::move(AuxiliaryResources),
477-
detail::CGType::Kernel, UR_KERNEL_CACHE_CONFIG_DEFAULT,
478-
false, // MKernelIsCooperative
479-
false, // MKernelUsesClusterLaunch
480-
0, // MKernelWorkGroupMemorySize
481-
CodeLoc));
482-
CommandGroup->MIsTopCodeLoc = IsTopCodeLoc;
483-
484-
// TODO DiscardEvent should include a check for requirements list
485-
// once accessors are implemented
486461
bool DiscardEvent = !CallerNeedsEvent && supportsDiscardingPiEvents();
487462

488-
EventImplPtr EventImpl = detail::Scheduler::getInstance().addCG(
489-
std::move(CommandGroup), *this, !DiscardEvent);
490-
return EventImpl;
463+
bool SchedulerBypass = detail::Scheduler::areEventsSafeForSchedulerBypass(
464+
CGData.MEvents, getContextImpl());
465+
466+
if (SchedulerBypass) {
467+
std::vector<ur_event_handle_t> RawEvents =
468+
detail::Command::getUrEvents(CGData.MEvents, this, false);
469+
470+
std::shared_ptr<detail::event_impl> ResultEvent =
471+
DiscardEvent ? nullptr
472+
: detail::event_impl::create_device_event(*this);
473+
474+
enqueueImpKernel(
475+
*this, NDRDesc, Args, nullptr, nullptr,
476+
toKernelNameStrT(KRInfo.KernelName()), *KRInfo.DeviceKernelInfoPtr(),
477+
RawEvents, ResultEvent.get(), nullptr, UR_KERNEL_CACHE_CONFIG_DEFAULT,
478+
false, false, 0, nullptr);
479+
480+
return ResultEvent;
481+
} else {
482+
std::unique_ptr<detail::CG> CommandGroup;
483+
std::vector<std::shared_ptr<detail::stream_impl>> StreamStorage;
484+
std::vector<std::shared_ptr<const void>> AuxiliaryResources;
485+
486+
Args = extractArgsAndReqsFromLambda(
487+
KRInfo.GetKernelFuncPtr(),
488+
KRInfo.DeviceKernelInfoPtr()->ParamDescGetter,
489+
KRInfo.DeviceKernelInfoPtr()->NumParams);
490+
491+
CommandGroup.reset(new detail::CGExecKernel(
492+
std::move(NDRDesc), KRInfo.HostKernel(),
493+
nullptr, // MKernel
494+
nullptr, // MKernelBundle
495+
std::move(CGData), std::move(Args),
496+
toKernelNameStrT(KRInfo.KernelName()), *KRInfo.DeviceKernelInfoPtr(),
497+
std::move(StreamStorage), std::move(AuxiliaryResources),
498+
detail::CGType::Kernel, UR_KERNEL_CACHE_CONFIG_DEFAULT,
499+
false, // MKernelIsCooperative
500+
false, // MKernelUsesClusterLaunch
501+
0, // MKernelWorkGroupMemorySize
502+
CodeLoc));
503+
CommandGroup->MIsTopCodeLoc = IsTopCodeLoc;
504+
505+
// TODO DiscardEvent should include a check for requirements list
506+
// once accessors are implemented
507+
508+
EventImplPtr EventImpl = detail::Scheduler::getInstance().addCG(
509+
std::move(CommandGroup), *this, !DiscardEvent);
510+
return EventImpl;
511+
}
491512
};
492513

493514
return submit_direct(CallerNeedsEvent, SubmitKernelFunc);
@@ -539,12 +560,12 @@ queue_impl::submit_direct(bool CallerNeedsEvent,
539560
EventImplPtr EventImpl = SubmitCommandFunc(CGData);
540561

541562
// Sync with the last event for in order queue
542-
if (isInOrder() && !EventImpl->isDiscarded()) {
563+
if (isInOrder() && EventImpl && !EventImpl->isDiscarded()) {
543564
LastEvent = EventImpl;
544565
}
545566

546567
// Barrier and un-enqueued commands synchronization for out or order queue
547-
if (!isInOrder() && !EventImpl->isEnqueued()) {
568+
if (!isInOrder() && EventImpl && !EventImpl->isEnqueued()) {
548569
MDefaultGraphDeps.UnenqueuedCmdEvents.push_back(EventImpl);
549570
}
550571

sycl/source/detail/queue_impl.hpp

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -369,46 +369,61 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
369369
event submit_kernel_direct_with_event(
370370
nd_range<1> Range, const detail::v1::KernelRuntimeInfo &KRInfo,
371371
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
372-
detail::EventImplPtr EventImpl = submit_kernel_direct_impl(
373-
NDRDescT{Range}, KRInfo, true, CodeLoc, IsTopCodeLoc);
372+
373+
NDRDescT NDRDesc{Range};
374+
375+
detail::EventImplPtr EventImpl =
376+
submit_kernel_direct_impl(NDRDesc, KRInfo, true, CodeLoc, IsTopCodeLoc);
374377
return createSyclObjFromImpl<event>(EventImpl);
375378
}
376379

377380
event submit_kernel_direct_with_event(
378381
nd_range<2> Range, const detail::v1::KernelRuntimeInfo &KRInfo,
379382
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
380-
detail::EventImplPtr EventImpl = submit_kernel_direct_impl(
381-
NDRDescT{Range}, KRInfo, true, CodeLoc, IsTopCodeLoc);
383+
384+
NDRDescT NDRDesc{Range};
385+
386+
detail::EventImplPtr EventImpl =
387+
submit_kernel_direct_impl(NDRDesc, KRInfo, true, CodeLoc, IsTopCodeLoc);
382388
return createSyclObjFromImpl<event>(EventImpl);
383389
}
384390

385391
event submit_kernel_direct_with_event(
386392
nd_range<3> Range, const detail::v1::KernelRuntimeInfo &KRInfo,
387393
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
388-
detail::EventImplPtr EventImpl = submit_kernel_direct_impl(
389-
NDRDescT{Range}, KRInfo, true, CodeLoc, IsTopCodeLoc);
394+
395+
NDRDescT NDRDesc{Range};
396+
397+
detail::EventImplPtr EventImpl =
398+
submit_kernel_direct_impl(NDRDesc, KRInfo, true, CodeLoc, IsTopCodeLoc);
390399
return createSyclObjFromImpl<event>(EventImpl);
391400
}
392401

393402
void submit_kernel_direct_without_event(
394403
nd_range<1> Range, const detail::v1::KernelRuntimeInfo &KRInfo,
395404
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
396-
submit_kernel_direct_impl(NDRDescT{Range}, KRInfo, false, CodeLoc,
397-
IsTopCodeLoc);
405+
406+
NDRDescT NDRDesc{Range};
407+
408+
submit_kernel_direct_impl(NDRDesc, KRInfo, false, CodeLoc, IsTopCodeLoc);
398409
}
399410

400411
void submit_kernel_direct_without_event(
401412
nd_range<2> Range, const detail::v1::KernelRuntimeInfo &KRInfo,
402413
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
403-
submit_kernel_direct_impl(NDRDescT{Range}, KRInfo, false, CodeLoc,
404-
IsTopCodeLoc);
414+
415+
NDRDescT NDRDesc{Range};
416+
417+
submit_kernel_direct_impl(NDRDesc, KRInfo, false, CodeLoc, IsTopCodeLoc);
405418
}
406419

407420
void submit_kernel_direct_without_event(
408421
nd_range<3> Range, const detail::v1::KernelRuntimeInfo &KRInfo,
409422
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
410-
submit_kernel_direct_impl(NDRDescT{Range}, KRInfo, false, CodeLoc,
411-
IsTopCodeLoc);
423+
424+
NDRDescT NDRDesc{Range};
425+
426+
submit_kernel_direct_impl(NDRDesc, KRInfo, false, CodeLoc, IsTopCodeLoc);
412427
}
413428

414429
void submit_without_event(const detail::type_erased_cgfo_ty &CGF,
@@ -954,7 +969,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
954969
///
955970
/// \return a SYCL event representing submitted command group or nullptr.
956971
detail::EventImplPtr submit_kernel_direct_impl(
957-
const NDRDescT &NDRDesc, const v1::KernelRuntimeInfo &KRInfo,
972+
NDRDescT &NDRDesc, const v1::KernelRuntimeInfo &KRInfo,
958973
bool CallerNeedsEvent, const detail::code_location &CodeLoc,
959974
bool IsTopCodeLoc);
960975

0 commit comments

Comments
 (0)