Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ class event_impl {
/// Clear the event state
void setStateIncomplete();

/// Set state as discarded.
void setStateDiscarded() { MState = HES_Discarded; }

/// Returns command that is associated with the event.
///
/// Scheduler mutex must be locked in read mode when this is called.
Expand Down
33 changes: 27 additions & 6 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,9 @@ void queue_impl::addEvent(const event &Event) {
addSharedEvent(Event);
}
// As long as the queue supports urQueueFinish we only need to store events
// for unenqueued commands and host tasks.
else if (MEmulateOOO || EImpl->getHandle() == nullptr) {
// for undiscarded, unenqueued commands and host tasks.
else if (MEmulateOOO ||
(EImpl->getHandle() == nullptr && !EImpl->isDiscarded())) {
std::weak_ptr<event_impl> EventWeakPtr{EImpl};
std::lock_guard<std::mutex> Lock{MMutex};
MEventsWeak.push_back(std::move(EventWeakPtr));
Expand Down Expand Up @@ -412,13 +413,24 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
template <typename HandlerFuncT>
event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
const std::vector<event> &DepEvents,
bool CallerNeedsEvent,
HandlerFuncT HandlerFunc) {
return submit(
SubmissionInfo SI{};
if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
submit_without_event(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
HandlerFunc(CGH);
},
Self, SI, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
return createDiscardedEvent();
}
return submit_with_event(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
HandlerFunc(CGH);
},
Self, /*CodeLoc*/ {}, /*SubmissionInfo*/ {}, /*IsTopCodeLoc*/ true);
Self, SI, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
}

template <typename HandlerFuncT, typename MemOpFuncT, typename... MemOpArgTs>
Expand Down Expand Up @@ -446,7 +458,16 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
NestedCallsTracker tracker;
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
return createDiscardedEvent();

event DiscardedEvent = createDiscardedEvent();
if (isInOrder()) {
// Store the discarded event for proper in-order dependency tracking.
auto &EventToStoreIn = MGraph.expired()
? MDefaultGraphDeps.LastEventPtr
: MExtGraphDeps.LastEventPtr;
EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
}
return DiscardedEvent;
}

event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
Expand All @@ -471,7 +492,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
return discard_or_return(ResEvent);
}
}
return submitWithHandler(Self, DepEvents, HandlerFunc);
return submitWithHandler(Self, DepEvents, CallerNeedsEvent, HandlerFunc);
}

void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ class queue_impl {
template <typename HandlerFuncT>
event submitWithHandler(const std::shared_ptr<queue_impl> &Self,
const std::vector<event> &DepEvents,
HandlerFuncT HandlerFunc);
bool CallerNeedsEvent, HandlerFuncT HandlerFunc);

/// Performs submission of a memory operation directly if scheduler can be
/// bypassed, or with a handler otherwise.
Expand Down
54 changes: 23 additions & 31 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
EnqueueResultT(EnqueueResultT::SyclEnqueueFailed, this, Res);
else {
MEvent->setEnqueued();
if (MShouldCompleteEventIfPossible &&
if (MShouldCompleteEventIfPossible && !MEvent->isDiscarded() &&
(MEvent->isHost() || MEvent->getHandle() == nullptr))
MEvent->setComplete();

Expand Down Expand Up @@ -3055,6 +3055,13 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
ur_event_handle_t *Event = DiscardUrEvent ? nullptr : &UREvent;
detail::EventImplPtr EventImpl = DiscardUrEvent ? nullptr : MEvent;

auto SetEventHandleOrDiscard = [&]() {
if (Event)
MEvent->setHandle(*Event);
else
MEvent->setStateDiscarded();
};

switch (MCommandGroup->getType()) {

case CGType::UpdateHost: {
Expand Down Expand Up @@ -3188,8 +3195,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<3 x N

return UR_RESULT_SUCCESS;
}
case CGType::FillUSM: {
Expand All @@ -3200,8 +3206,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::PrefetchUSM: {
Expand All @@ -3212,8 +3217,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::AdviseUSM: {
Expand All @@ -3225,8 +3229,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::Copy2DUSM: {
Expand All @@ -3238,8 +3241,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::Fill2DUSM: {
Expand All @@ -3251,8 +3253,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::Memset2DUSM: {
Expand All @@ -3264,8 +3265,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::CodeplayHostTask: {
Expand Down Expand Up @@ -3405,8 +3405,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
MQueue->getAdapter()->call<UrApiKind::urEnqueueNativeCommandExp>(
MQueue->getHandleRef(), InteropFreeFunc, &CustomOpData, ReqMems.size(),
ReqMems.data(), nullptr, RawEvents.size(), RawEvents.data(), Event);
if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::Barrier: {
Expand All @@ -3416,8 +3415,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
MEvent->setHostEnqueueTime();
Adapter->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
MQueue->getHandleRef(), 0, nullptr, Event);
if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::BarrierWaitlist: {
Expand All @@ -3434,8 +3432,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
MEvent->setHostEnqueueTime();
Adapter->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
MQueue->getHandleRef(), UrEvents.size(), &UrEvents[0], Event);
if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::ProfilingTag: {
Expand Down Expand Up @@ -3482,8 +3479,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Adapter->call<UrApiKind::urEventRelease>(PostTimestampBarrierEvent);
}

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::CopyToDeviceGlobal: {
Expand All @@ -3496,8 +3492,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::CopyFromDeviceGlobal: {
Expand All @@ -3511,8 +3506,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();
return UR_RESULT_SUCCESS;
}
case CGType::ReadWriteHostPipe: {
Expand Down Expand Up @@ -3543,8 +3537,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
CmdBufferCG->MCommandBuffer, MQueue->getHandleRef(),
RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0],
Event);
if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();

return Err;
}
Expand All @@ -3560,8 +3553,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
Result != UR_RESULT_SUCCESS)
return Result;

if (Event)
MEvent->setHandle(*Event);
SetEventHandleOrDiscard();

return UR_RESULT_SUCCESS;
}
Expand Down
37 changes: 37 additions & 0 deletions sycl/test-e2e/Regression/multi_thread_enqueue_discarded.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// REQUIRES: aspect-usm_device_allocations
// RUN: %{build} %threads_lib -o %t.out
// RUN: %{run} %t.out

// Regression test for a case where parallel work with enqueue functions
// discarding their results would cause implicit waits on discarded events.

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
#include <sycl/properties/all_properties.hpp>
#include <sycl/usm.hpp>
#include <thread>

void threadFunction(int) {
sycl::queue Q{{sycl::property::queue::in_order()}};

constexpr int Size = 128 * 128 * 128;
int *DevMem = sycl::malloc_device<int>(Size, Q);

sycl::ext::oneapi::experimental::submit(
Q, [&](sycl::handler &cgh) { cgh.fill<int>(DevMem, 1, Size); });
Q.wait_and_throw();

sycl::free(DevMem, Q);
}

int main() {
constexpr size_t NThreads = 2;
std::array<std::thread, NThreads> Threads;

for (size_t I = 0; I < NThreads; I++)
Threads[I] = std::thread{threadFunction, I};
for (size_t I = 0; I < NThreads; I++)
Threads[I].join();

return 0;
}
Loading
Loading