@@ -99,6 +99,29 @@ struct urMultiQueueLaunchMemcpyTest : uur::urMultiDeviceContextTestTemplate<1>,
9999 UUR_RETURN_ON_FATAL_FAILURE (
100100 uur::urMultiDeviceContextTestTemplate<1 >::TearDown ());
101101 }
102+
103+ void runBackgroundCheck (std::vector<uur::raii::Event> &Events) {
104+ std::vector<std::thread> threads;
105+ for (size_t i = 0 ; i < Events.size (); i++) {
106+ threads.emplace_back ([&, i] {
107+ ur_event_status_t status;
108+ do {
109+ ASSERT_SUCCESS (urEventGetInfo (
110+ Events[i].get (), UR_EVENT_INFO_COMMAND_EXECUTION_STATUS,
111+ sizeof (ur_event_status_t ), &status, nullptr ));
112+ } while (status != UR_EVENT_STATUS_COMPLETE);
113+
114+ auto ExpectedValue = InitialValue + i + 1 ;
115+ for (uint32_t j = 0 ; j < ArraySize; ++j) {
116+ ASSERT_EQ (reinterpret_cast <uint32_t *>(SharedMem[i])[j],
117+ ExpectedValue);
118+ }
119+ });
120+ }
121+ for (auto &thread : threads) {
122+ thread.join ();
123+ }
124+ }
102125};
103126
104127template <typename Param>
@@ -189,26 +212,24 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
189212
190213 auto useEvents = std::get<1 >(GetParam ()).value ;
191214
192- std::vector<uur::raii::Event> Events (numOps * 2 - 1 );
193- for (size_t i = 0 ; i < numOps; i++) {
194- size_t waitNum = 0 ;
195- ur_event_handle_t *lastEvent = nullptr ;
196- ur_event_handle_t *kernelEvent = nullptr ;
197- ur_event_handle_t *memcpyEvent = nullptr ;
215+ std::vector<uur::raii::Event> kernelEvents (numOps);
216+ std::vector<uur::raii::Event> memcpyEvents (numOps - 1 );
198217
199- if (useEvents) {
200- // Events are: kernelEvent0, memcpyEvent0, kernelEvent1, ...
201- waitNum = i > 0 ? 1 : 0 ;
202- lastEvent = i > 0 ? Events[i * 2 - 1 ].ptr () : nullptr ;
218+ ur_event_handle_t *lastMemcpyEvent = nullptr ;
219+ ur_event_handle_t *kernelEvent = nullptr ;
220+ ur_event_handle_t *memcpyEvent = nullptr ;
203221
204- kernelEvent = Events[i * 2 ].ptr ();
205- memcpyEvent = i < numOps - 1 ? Events[i * 2 + 1 ].ptr () : nullptr ;
222+ for (size_t i = 0 ; i < numOps; i++) {
223+ if (useEvents) {
224+ lastMemcpyEvent = memcpyEvent;
225+ kernelEvent = kernelEvents[i].ptr ();
226+ memcpyEvent = i < numOps - 1 ? memcpyEvents[i].ptr () : nullptr ;
206227 }
207228
208229 // execute kernel that increments each element by 1
209230 ASSERT_SUCCESS (urEnqueueKernelLaunch (
210231 queue, kernels[i], n_dimensions, &global_offset, &ArraySize,
211- nullptr , waitNum, lastEvent , kernelEvent));
232+ nullptr , bool (lastMemcpyEvent), lastMemcpyEvent , kernelEvent));
212233
213234 // copy the memory (input for the next kernel)
214235 if (i < numOps - 1 ) {
@@ -220,9 +241,9 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
220241 }
221242
222243 if (useEvents) {
223- urEventWait (1 , Events .back ().ptr ());
244+ ASSERT_SUCCESS ( urEventWait (1 , kernelEvents .back ().ptr () ));
224245 } else {
225- urQueueFinish (queue);
246+ ASSERT_SUCCESS ( urQueueFinish (queue) );
226247 }
227248
228249 size_t ExpectedValue = InitialValue;
@@ -237,23 +258,38 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
237258
238259template <typename T>
239260inline std::string
240- printBoolParam (const testing::TestParamInfo<typename T::ParamType> &info) {
261+ printParams (const testing::TestParamInfo<typename T::ParamType> &info) {
241262 std::stringstream ss;
242- ss << (info.param .value ? " " : " No" ) << info.param .name ;
263+
264+ auto param1 = std::get<0 >(info.param );
265+ ss << (param1.value ? " " : " No" ) << param1.name ;
266+
267+ auto param2 = std::get<1 >(info.param );
268+ ss << (param2.value ? " " : " No" ) << param2.name ;
269+
270+ if constexpr (std::tuple_size_v < typename T::ParamType >> 2 ) {
271+ auto param3 = std::get<2 >(info.param );
272+ }
273+
243274 return ss.str ();
244275}
245276
246277using urEnqueueKernelLaunchIncrementMultiDeviceTest =
247- urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<uur::BoolTestParam>;
278+ urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
279+ std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;
248280
249281INSTANTIATE_TEST_SUITE_P (
250282 , urEnqueueKernelLaunchIncrementMultiDeviceTest,
251- testing::ValuesIn (uur::BoolTestParam::makeBoolParam(" UseEventWait" )),
252- printBoolParam<urEnqueueKernelLaunchIncrementMultiDeviceTest>);
283+ testing::Combine (
284+ testing::ValuesIn (uur::BoolTestParam::makeBoolParam(" UseEventWait" )),
285+ testing::ValuesIn(
286+ uur::BoolTestParam::makeBoolParam (" RunBackgroundCheck" ))),
287+ printParams<urEnqueueKernelLaunchIncrementMultiDeviceTest>);
253288
254289// Do a chain of kernelLaunch(dev0) -> memcpy(dev0, dev1) -> kernelLaunch(dev1) ... ops
255290TEST_P (urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
256- auto waitOnEvent = GetParam ().value ;
291+ auto waitOnEvent = std::get<0 >(GetParam ()).value ;
292+ auto runBackgroundCheck = std::get<1 >(GetParam ()).value ;
257293
258294 size_t returned_size;
259295 ASSERT_SUCCESS (urDeviceGetInfo (devices[0 ], UR_DEVICE_INFO_EXTENSIONS, 0 ,
@@ -277,20 +313,22 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
277313 constexpr size_t global_offset = 0 ;
278314 constexpr size_t n_dimensions = 1 ;
279315
280- std::vector<uur::raii::Event> Events (devices.size () * 2 - 1 );
316+ std::vector<uur::raii::Event> kernelEvents (devices.size ());
317+ std::vector<uur::raii::Event> memcpyEvents (devices.size () - 1 );
318+
319+ ur_event_handle_t *lastMemcpyEvent = nullptr ;
320+ ur_event_handle_t *kernelEvent = nullptr ;
321+ ur_event_handle_t *memcpyEvent = nullptr ;
322+
281323 for (size_t i = 0 ; i < devices.size (); i++) {
282- // Events are: kernelEvent0, memcpyEvent0, kernelEvent1, ...
283- size_t waitNum = i > 0 ? 1 : 0 ;
284- ur_event_handle_t *lastEvent =
285- i > 0 ? Events[i * 2 - 1 ].ptr () : nullptr ;
286- ur_event_handle_t *kernelEvent = Events[i * 2 ].ptr ();
287- ur_event_handle_t *memcpyEvent =
288- i < devices.size () - 1 ? Events[i * 2 + 1 ].ptr () : nullptr ;
324+ lastMemcpyEvent = memcpyEvent;
325+ kernelEvent = kernelEvents[i].ptr ();
326+ memcpyEvent = i < devices.size () - 1 ? memcpyEvents[i].ptr () : nullptr ;
289327
290328 // execute kernel that increments each element by 1
291329 ASSERT_SUCCESS (urEnqueueKernelLaunch (
292330 queues[i], kernels[i], n_dimensions, &global_offset, &ArraySize,
293- nullptr , waitNum, lastEvent , kernelEvent));
331+ nullptr , bool (lastMemcpyEvent), lastMemcpyEvent , kernelEvent));
294332
295333 // copy the memory to next device
296334 if (i < devices.size () - 1 ) {
@@ -300,12 +338,17 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
300338 }
301339 }
302340
341+ // While the device(s) execute, loop over the events and if completed, verify the results
342+ if (runBackgroundCheck) {
343+ this ->runBackgroundCheck (kernelEvents);
344+ }
345+
303346 // synchronize on the last queue/event only, this has to ensure all the operations
304347 // are completed
305348 if (waitOnEvent) {
306- urEventWait (1 , Events .back ().ptr ());
349+ ASSERT_SUCCESS ( urEventWait (1 , kernelEvents .back ().ptr () ));
307350 } else {
308- urQueueFinish (queues.back ());
351+ ASSERT_SUCCESS ( urQueueFinish (queues.back () ));
309352 }
310353
311354 size_t ExpectedValue = InitialValue;
@@ -318,20 +361,6 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
318361 }
319362}
320363
321- template <typename T>
322- inline std::string
323- printParams (const testing::TestParamInfo<typename T::ParamType> &info) {
324- std::stringstream ss;
325-
326- auto param1 = std::get<0 >(info.param );
327- auto param2 = std::get<1 >(info.param );
328-
329- ss << (param1.value ? " " : " No" ) << param1.name ;
330- ss << (param2.value ? " " : " No" ) << param2.name ;
331-
332- return ss.str ();
333- }
334-
335364using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest =
336365 urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
337366 std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;
@@ -392,9 +421,9 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
392421 lastEvent, signalEvent));
393422
394423 if (useEvents) {
395- urEventWait (1 , Events.back ().ptr ());
424+ ASSERT_SUCCESS ( urEventWait (1 , Events.back ().ptr () ));
396425 } else {
397- urQueueFinish (queue);
426+ ASSERT_SUCCESS ( urQueueFinish (queue) );
398427 }
399428
400429 size_t ExpectedValue = InitialValue;
0 commit comments