Skip to content

Commit ae29eb5

Browse files
committed
[SYCL] Fix discarded enqueue function event markings
This commit fixes an issue where memory operations enqueued through the enqueue free functions would not correctly mark the resulting events as discarded, breaking in-order barrier assumptions. Signed-off-by: Larsen, Steffen <[email protected]>
1 parent 814290d commit ae29eb5

File tree

5 files changed

+86
-6
lines changed

5 files changed

+86
-6
lines changed

sycl/source/detail/event_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ class event_impl {
152152
/// Clear the event state
153153
void setStateIncomplete();
154154

155+
/// Set state as discarded.
156+
void setStateDiscarded() { MState = HES_Discarded; }
157+
155158
/// Returns command that is associated with the event.
156159
///
157160
/// Scheduler mutex must be locked in read mode when this is called.

sycl/source/detail/queue_impl.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -412,13 +412,24 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
412412
template <typename HandlerFuncT>
413413
event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
414414
const std::vector<event> &DepEvents,
415+
bool CallerNeedsEvent,
415416
HandlerFuncT HandlerFunc) {
416-
return submit(
417+
SubmissionInfo SI{};
418+
if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
419+
submit_without_event(
420+
[&](handler &CGH) {
421+
CGH.depends_on(DepEvents);
422+
HandlerFunc(CGH);
423+
},
424+
Self, SI, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
425+
return createDiscardedEvent();
426+
}
427+
return submit_with_event(
417428
[&](handler &CGH) {
418429
CGH.depends_on(DepEvents);
419430
HandlerFunc(CGH);
420431
},
421-
Self, /*CodeLoc*/ {}, /*SubmissionInfo*/ {}, /*IsTopCodeLoc*/ true);
432+
Self, SI, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
422433
}
423434

424435
template <typename HandlerFuncT, typename MemOpFuncT, typename... MemOpArgTs>
@@ -446,7 +457,16 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
446457
NestedCallsTracker tracker;
447458
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
448459
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
449-
return createDiscardedEvent();
460+
461+
event DiscardedEvent = createDiscardedEvent();
462+
if (isInOrder()) {
463+
// Store the discarded event for proper in-order dependency tracking.
464+
auto &EventToStoreIn = MGraph.expired()
465+
? MDefaultGraphDeps.LastEventPtr
466+
: MExtGraphDeps.LastEventPtr;
467+
EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
468+
}
469+
return DiscardedEvent;
450470
}
451471

452472
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
@@ -471,7 +491,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
471491
return discard_or_return(ResEvent);
472492
}
473493
}
474-
return submitWithHandler(Self, DepEvents, HandlerFunc);
494+
return submitWithHandler(Self, DepEvents, CallerNeedsEvent, HandlerFunc);
475495
}
476496

477497
void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,

sycl/source/detail/queue_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,7 @@ class queue_impl {
868868
template <typename HandlerFuncT>
869869
event submitWithHandler(const std::shared_ptr<queue_impl> &Self,
870870
const std::vector<event> &DepEvents,
871-
HandlerFuncT HandlerFunc);
871+
bool CallerNeedsEvent, HandlerFuncT HandlerFunc);
872872

873873
/// Performs submission of a memory operation directly if scheduler can be
874874
/// bypassed, or with a handler otherwise.

sycl/source/detail/scheduler/commands.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
956956
EnqueueResultT(EnqueueResultT::SyclEnqueueFailed, this, Res);
957957
else {
958958
MEvent->setEnqueued();
959-
if (MShouldCompleteEventIfPossible &&
959+
if (MShouldCompleteEventIfPossible && !MEvent->isDiscarded() &&
960960
(MEvent->isHost() || MEvent->getHandle() == nullptr))
961961
MEvent->setComplete();
962962

@@ -3055,6 +3055,10 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
30553055
ur_event_handle_t *Event = DiscardUrEvent ? nullptr : &UREvent;
30563056
detail::EventImplPtr EventImpl = DiscardUrEvent ? nullptr : MEvent;
30573057

3058+
// If we are discarding the UR event, we also need to mark the result event.
3059+
if (DiscardUrEvent)
3060+
MEvent->setStateDiscarded();
3061+
30583062
switch (MCommandGroup->getType()) {
30593063

30603064
case CGType::UpdateHost: {

sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
//===----------------------------------------------------------------------===//
88
// Tests the behavior of enqueue free functions when events can be discarded.
99

10+
#include "detail/event_impl.hpp"
11+
#include "detail/queue_impl.hpp"
1012
#include "sycl/platform.hpp"
1113
#include <helpers/TestKernel.hpp>
1214
#include <helpers/UrMock.hpp>
@@ -107,6 +109,13 @@ class EnqueueFunctionsEventsTests : public ::testing::Test {
107109
queue Q;
108110
};
109111

112+
inline void CheckLastEventDiscarded(sycl::queue &Q) {
113+
auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q);
114+
event LastEvent = QueueImplPtr->getLastEvent();
115+
auto LastEventImplPtr = sycl::detail::getSyclObjImpl(LastEvent);
116+
ASSERT_TRUE(LastEventImplPtr->isDiscarded());
117+
}
118+
110119
TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) {
111120
mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch",
112121
&redefined_urEnqueueKernelLaunch);
@@ -116,6 +125,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) {
116125
});
117126

118127
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
128+
129+
CheckLastEventDiscarded(Q);
119130
}
120131

121132
TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) {
@@ -125,6 +136,8 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) {
125136
oneapiext::single_task<TestKernel<>>(Q, []() {});
126137

127138
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
139+
140+
CheckLastEventDiscarded(Q);
128141
}
129142

130143
TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) {
@@ -144,6 +157,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) {
144157
[&](handler &CGH) { oneapiext::single_task(CGH, Kernel); });
145158

146159
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
160+
161+
CheckLastEventDiscarded(Q);
147162
}
148163

149164
TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) {
@@ -163,6 +178,8 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) {
163178
oneapiext::single_task(Q, Kernel);
164179

165180
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
181+
182+
CheckLastEventDiscarded(Q);
166183
}
167184

168185
TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) {
@@ -174,6 +191,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) {
174191
});
175192

176193
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
194+
195+
CheckLastEventDiscarded(Q);
177196
}
178197

179198
TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) {
@@ -183,6 +202,8 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) {
183202
oneapiext::parallel_for<TestKernel<>>(Q, range<1>{32}, [](item<1>) {});
184203

185204
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
205+
206+
CheckLastEventDiscarded(Q);
186207
}
187208

188209
TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) {
@@ -203,6 +224,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) {
203224
});
204225

205226
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
227+
228+
CheckLastEventDiscarded(Q);
206229
}
207230

208231
TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) {
@@ -222,6 +245,8 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) {
222245
oneapiext::parallel_for(Q, range<1>{32}, Kernel);
223246

224247
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
248+
249+
CheckLastEventDiscarded(Q);
225250
}
226251

227252
TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) {
@@ -234,6 +259,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) {
234259
});
235260

236261
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
262+
263+
CheckLastEventDiscarded(Q);
237264
}
238265

239266
TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) {
@@ -244,6 +271,8 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) {
244271
[](nd_item<1>) {});
245272

246273
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
274+
275+
CheckLastEventDiscarded(Q);
247276
}
248277

249278
TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) {
@@ -264,6 +293,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) {
264293
});
265294

266295
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
296+
297+
CheckLastEventDiscarded(Q);
267298
}
268299

269300
TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) {
@@ -283,6 +314,8 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) {
283314
oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel);
284315

285316
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
317+
318+
CheckLastEventDiscarded(Q);
286319
}
287320

288321
TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) {
@@ -299,6 +332,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) {
299332

300333
ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1});
301334

335+
CheckLastEventDiscarded(Q);
336+
302337
free(Src, Q);
303338
free(Dst, Q);
304339
}
@@ -315,6 +350,8 @@ TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) {
315350

316351
ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1});
317352

353+
CheckLastEventDiscarded(Q);
354+
318355
free(Src, Q);
319356
free(Dst, Q);
320357
}
@@ -332,6 +369,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) {
332369

333370
ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1});
334371

372+
CheckLastEventDiscarded(Q);
373+
335374
free(Src, Q);
336375
free(Dst, Q);
337376
}
@@ -348,6 +387,8 @@ TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) {
348387

349388
ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1});
350389

390+
CheckLastEventDiscarded(Q);
391+
351392
free(Src, Q);
352393
free(Dst, Q);
353394
}
@@ -365,6 +406,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) {
365406

366407
ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1});
367408

409+
CheckLastEventDiscarded(Q);
410+
368411
free(Dst, Q);
369412
}
370413

@@ -379,6 +422,8 @@ TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) {
379422

380423
ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1});
381424

425+
CheckLastEventDiscarded(Q);
426+
382427
free(Dst, Q);
383428
}
384429

@@ -394,6 +439,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) {
394439

395440
ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1});
396441

442+
CheckLastEventDiscarded(Q);
443+
397444
free(Dst, Q);
398445
}
399446

@@ -408,6 +455,8 @@ TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) {
408455

409456
ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1});
410457

458+
CheckLastEventDiscarded(Q);
459+
411460
free(Dst, Q);
412461
}
413462

@@ -424,6 +473,8 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) {
424473

425474
ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1});
426475

476+
CheckLastEventDiscarded(Q);
477+
427478
free(Dst, Q);
428479
}
429480

@@ -438,6 +489,8 @@ TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) {
438489

439490
ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1});
440491

492+
CheckLastEventDiscarded(Q);
493+
441494
free(Dst, Q);
442495
}
443496

0 commit comments

Comments
 (0)