Skip to content

Commit d28df15

Browse files
committed
[SYCL] Event-less APIs synchronization with the scheduler
Commands submitted to the scheduler need to unconditionally be associated with an event (for both event and event-less APIs). This is because some commands might already be scheduled and waiting for the submission, and a newly submitted command need to return an event which can be used by the in-order type queue to properly order the commands.
1 parent 0433e4d commit d28df15

File tree

3 files changed

+71
-28
lines changed

3 files changed

+71
-28
lines changed

sycl/source/handler.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -984,15 +984,15 @@ event handler::finalize() {
984984
#endif
985985
}
986986

987-
bool DiscardEvent = !impl->MEventNeeded && Queue &&
988-
Queue->supportsDiscardingPiEvents() &&
989-
CommandGroup->getRequirements().size() == 0;
990-
987+
// Regardless of whether an event has been requested, the scheduler
988+
// needs to generate an event so the commands are properly ordered
989+
// (for in-order queue) and synchronized with a barrier (for out-of-order
990+
// queue)
991991
detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG(
992-
std::move(CommandGroup), *Queue, !DiscardEvent);
992+
std::move(CommandGroup), *Queue, true);
993993

994994
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
995-
return DiscardEvent ? nullptr : Event;
995+
return Event;
996996
#else
997997
return detail::createSyclObjFromImpl<event>(Event);
998998
#endif

sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,40 +26,30 @@ inline ur_result_t after_urKernelGetInfo(void *pParams) {
2626
static thread_local size_t counter_urEnqueueKernelLaunch = 0;
2727
inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) {
2828
++counter_urEnqueueKernelLaunch;
29-
auto params = *static_cast<ur_enqueue_kernel_launch_params_t *>(pParams);
30-
EXPECT_EQ(*params.pphEvent, nullptr);
3129
return UR_RESULT_SUCCESS;
3230
}
3331

3432
static thread_local size_t counter_urUSMEnqueueMemcpy = 0;
3533
inline ur_result_t redefined_urUSMEnqueueMemcpy(void *pParams) {
3634
++counter_urUSMEnqueueMemcpy;
37-
auto params = *static_cast<ur_enqueue_usm_memcpy_params_t *>(pParams);
38-
EXPECT_EQ(*params.pphEvent, nullptr);
3935
return UR_RESULT_SUCCESS;
4036
}
4137

4238
static thread_local size_t counter_urUSMEnqueueFill = 0;
4339
inline ur_result_t redefined_urUSMEnqueueFill(void *pParams) {
4440
++counter_urUSMEnqueueFill;
45-
auto params = *static_cast<ur_enqueue_usm_fill_params_t *>(pParams);
46-
EXPECT_EQ(*params.pphEvent, nullptr);
4741
return UR_RESULT_SUCCESS;
4842
}
4943

5044
static thread_local size_t counter_urUSMEnqueuePrefetch = 0;
5145
inline ur_result_t redefined_urUSMEnqueuePrefetch(void *pParams) {
5246
++counter_urUSMEnqueuePrefetch;
53-
auto params = *static_cast<ur_enqueue_usm_prefetch_params_t *>(pParams);
54-
EXPECT_EQ(*params.pphEvent, nullptr);
5547
return UR_RESULT_SUCCESS;
5648
}
5749

5850
static thread_local size_t counter_urUSMEnqueueMemAdvise = 0;
5951
inline ur_result_t redefined_urUSMEnqueueMemAdvise(void *pParams) {
6052
++counter_urUSMEnqueueMemAdvise;
61-
auto params = *static_cast<ur_enqueue_usm_advise_params_t *>(pParams);
62-
EXPECT_EQ(*params.pphEvent, nullptr);
6353
return UR_RESULT_SUCCESS;
6454
}
6555

sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
using namespace sycl;
2323

24+
namespace oneapiext = ext::oneapi::experimental;
25+
2426
size_t GEventsWaitCounter = 0;
2527

2628
inline ur_result_t redefinedEventsWaitWithBarrier(void *pParams) {
@@ -49,19 +51,19 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) {
4951
}
5052

5153
enum class CommandType { KERNEL = 1, MEMSET = 2 };
52-
std::vector<std::pair<CommandType, size_t>> ExecutedCommands;
54+
std::vector<std::tuple<CommandType, size_t, size_t>> ExecutedCommands;
5355

5456
inline ur_result_t customEnqueueKernelLaunch(void *pParams) {
5557
auto params = *static_cast<ur_enqueue_kernel_launch_params_t *>(pParams);
56-
ExecutedCommands.push_back(
57-
{CommandType::KERNEL, *params.pnumEventsInWaitList});
58+
ExecutedCommands.push_back({CommandType::KERNEL, *params.pnumEventsInWaitList,
59+
*params.ppGlobalWorkSize[0]});
5860
return UR_RESULT_SUCCESS;
5961
}
6062

6163
inline ur_result_t customEnqueueUSMFill(void *pParams) {
6264
auto params = *static_cast<ur_enqueue_usm_fill_params_t *>(pParams);
6365
ExecutedCommands.push_back(
64-
{CommandType::MEMSET, *params.pnumEventsInWaitList});
66+
{CommandType::MEMSET, *params.pnumEventsInWaitList, 0});
6567
return UR_RESULT_SUCCESS;
6668
}
6769

@@ -112,10 +114,12 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) {
112114
InOrderQueue.wait();
113115

114116
ASSERT_EQ(ExecutedCommands.size(), 2u);
115-
EXPECT_EQ(ExecutedCommands[0].first /*CommandType*/, CommandType::MEMSET);
116-
EXPECT_EQ(ExecutedCommands[0].second /*EventsCount*/, 0u);
117-
EXPECT_EQ(ExecutedCommands[1].first /*CommandType*/, CommandType::KERNEL);
118-
EXPECT_EQ(ExecutedCommands[1].second /*EventsCount*/, 0u);
117+
EXPECT_EQ(std::get<0>(ExecutedCommands[0]) /*CommandType*/,
118+
CommandType::MEMSET);
119+
EXPECT_EQ(std::get<1>(ExecutedCommands[0]) /*EventsCount*/, 0u);
120+
EXPECT_EQ(std::get<0>(ExecutedCommands[1]) /*CommandType*/,
121+
CommandType::KERNEL);
122+
EXPECT_EQ(std::get<1>(ExecutedCommands[1]) /*EventsCount*/, 0u);
119123
}
120124

121125
TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) {
@@ -157,8 +161,57 @@ TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) {
157161
InOrderQueue.wait();
158162

159163
ASSERT_EQ(ExecutedCommands.size(), 2u);
160-
EXPECT_EQ(ExecutedCommands[0].first /*CommandType*/, CommandType::MEMSET);
161-
EXPECT_EQ(ExecutedCommands[0].second /*EventsCount*/, 0u);
162-
EXPECT_EQ(ExecutedCommands[1].first /*CommandType*/, CommandType::KERNEL);
163-
EXPECT_EQ(ExecutedCommands[1].second /*EventsCount*/, 0u);
164+
EXPECT_EQ(std::get<0>(ExecutedCommands[0]) /*CommandType*/,
165+
CommandType::MEMSET);
166+
EXPECT_EQ(std::get<1>(ExecutedCommands[0]) /*EventsCount*/, 0u);
167+
EXPECT_EQ(std::get<0>(ExecutedCommands[1]) /*CommandType*/,
168+
CommandType::KERNEL);
169+
EXPECT_EQ(std::get<1>(ExecutedCommands[1]) /*EventsCount*/, 0u);
164170
}
171+
172+
TEST_F(SchedulerTest, InOrderQueueCrossDepsEnqueueFunctions) {
173+
ExecutedCommands.clear();
174+
sycl::unittest::UrMock<> Mock;
175+
mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch",
176+
&customEnqueueKernelLaunch);
177+
178+
sycl::platform Plt = sycl::platform();
179+
180+
context Ctx{Plt};
181+
queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()};
182+
183+
std::mutex CvMutex;
184+
std::condition_variable Cv;
185+
bool ready = false;
186+
187+
InOrderQueue.submit([&](sycl::handler &CGH) {
188+
CGH.host_task([&] {
189+
std::unique_lock<std::mutex> lk(CvMutex);
190+
Cv.wait(lk, [&ready] { return ready; });
191+
});
192+
});
193+
194+
oneapiext::nd_launch<TestKernel>(
195+
InOrderQueue, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {});
196+
197+
oneapiext::nd_launch<TestKernel>(
198+
InOrderQueue, nd_range<1>{range<1>{64}, range<1>{32}}, [](nd_item<1>) {});
199+
200+
{
201+
std::unique_lock<std::mutex> lk(CvMutex);
202+
ready = true;
203+
}
204+
Cv.notify_one();
205+
206+
InOrderQueue.wait();
207+
208+
ASSERT_EQ(ExecutedCommands.size(), 2u);
209+
EXPECT_EQ(std::get<0>(ExecutedCommands[0]) /*CommandType*/,
210+
CommandType::KERNEL);
211+
EXPECT_EQ(std::get<1>(ExecutedCommands[0]) /*EventsCount*/, 0u);
212+
EXPECT_EQ(std::get<2>(ExecutedCommands[0]) /*GlobalWorkSize*/, 32u);
213+
EXPECT_EQ(std::get<0>(ExecutedCommands[1]) /*CommandType*/,
214+
CommandType::KERNEL);
215+
EXPECT_EQ(std::get<1>(ExecutedCommands[1]) /*EventsCount*/, 0u);
216+
EXPECT_EQ(std::get<2>(ExecutedCommands[1]) /*GlobalWorkSize*/, 64u);
217+
}

0 commit comments

Comments
 (0)