1313#include " common.hpp"
1414#include " usm.hpp"
1515
16+ template <class T >
1617void AllocDeleterCallback (cl_event event, cl_int, void *pUserData) {
1718 clReleaseEvent (event);
18- auto Info = static_cast <AllocDeleterCallbackInfo *>(pUserData);
19+ auto Info = static_cast <T *>(pUserData);
1920 delete Info;
2021}
2122
@@ -316,7 +317,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
316317 auto Info = new AllocDeleterCallbackInfo (USMFree, CLContext, HostBuffer);
317318
318319 ClErr =
319- clSetEventCallback (CopyEvent, CL_COMPLETE, AllocDeleterCallback, Info);
320+ clSetEventCallback (CopyEvent, CL_COMPLETE,
321+ AllocDeleterCallback<AllocDeleterCallbackInfo>, Info);
320322 if (ClErr != CL_SUCCESS) {
321323 // We can attempt to recover gracefully by attempting to wait for the copy
322324 // to finish and deleting the info struct here.
@@ -376,6 +378,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
376378 sizeof (cl_device_id), &DstDevice, nullptr ));
377379
378380 if ((SrcDevice && DstDevice) && SrcDevice != DstDevice) {
381+ // We need a queue associated with each device, so first figure out which
382+ // one we weren't given.
383+ cl_device_id QueueDevice = nullptr ;
384+ CL_RETURN_ON_FAILURE (clGetCommandQueueInfo (
385+ cl_adapter::cast<cl_command_queue>(hQueue), CL_QUEUE_DEVICE,
386+ sizeof (QueueDevice), &QueueDevice, nullptr ));
387+
388+ cl_command_queue MissingQueue = nullptr , SrcQueue = nullptr ,
389+ DstQueue = nullptr ;
390+ if (QueueDevice == SrcDevice) {
391+ MissingQueue = clCreateCommandQueue (CLContext, DstDevice, 0 , &CLErr);
392+ SrcQueue = cl_adapter::cast<cl_command_queue>(hQueue);
393+ DstQueue = MissingQueue;
394+ } else {
395+ MissingQueue = clCreateCommandQueue (CLContext, SrcDevice, 0 , &CLErr);
396+ DstQueue = cl_adapter::cast<cl_command_queue>(hQueue);
397+ SrcQueue = MissingQueue;
398+ }
399+ CL_RETURN_ON_FAILURE (CLErr);
400+
379401 cl_event HostCopyEvent = nullptr , FinalCopyEvent = nullptr ;
380402 clHostMemAllocINTEL_fn HostMemAlloc = nullptr ;
381403 UR_RETURN_ON_FAILURE (cl_ext::getExtFuncFromContext<clHostMemAllocINTEL_fn>(
@@ -402,19 +424,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
402424 };
403425
404426 UR_RETURN_ON_FAILURE (checkCLErr (USMMemcpy (
405- cl_adapter::cast<cl_command_queue>(hQueue), blocking, HostAlloc, pSrc,
406- size, numEventsInWaitList,
427+ SrcQueue, blocking, HostAlloc, pSrc, size, numEventsInWaitList,
407428 cl_adapter::cast<const cl_event *>(phEventWaitList), &HostCopyEvent)));
408429
409- UR_RETURN_ON_FAILURE (checkCLErr (
410- USMMemcpy (cl_adapter::cast<cl_command_queue>(hQueue) , blocking, pDst,
411- HostAlloc, size, 1 , &HostCopyEvent, &FinalCopyEvent)));
430+ UR_RETURN_ON_FAILURE (
431+ checkCLErr ( USMMemcpy (DstQueue , blocking, pDst, HostAlloc, size, 1 ,
432+ &HostCopyEvent, &FinalCopyEvent)));
412433
413434 // If this is a blocking operation we can do our cleanup immediately,
414435 // otherwise we need to defer it to an event callback.
415436 if (blocking) {
416437 CL_RETURN_ON_FAILURE (USMFree (CLContext, HostAlloc));
417438 CL_RETURN_ON_FAILURE (clReleaseEvent (HostCopyEvent));
439+ CL_RETURN_ON_FAILURE (clReleaseCommandQueue (MissingQueue));
418440 if (phEvent) {
419441 *phEvent = cl_adapter::cast<ur_event_handle_t >(FinalCopyEvent);
420442 } else {
@@ -429,11 +451,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
429451 }
430452
431453 // This self destructs taking the event and allocation with it.
432- auto DeleterInfo =
433- new AllocDeleterCallbackInfo{ USMFree, CLContext, HostAlloc} ;
454+ auto DeleterInfo = new AllocDeleterCallbackInfoWithQueue (
455+ USMFree, CLContext, HostAlloc, MissingQueue) ;
434456
435- CLErr = clSetEventCallback (HostCopyEvent, CL_COMPLETE,
436- AllocDeleterCallback, DeleterInfo);
457+ CLErr = clSetEventCallback (
458+ HostCopyEvent, CL_COMPLETE,
459+ AllocDeleterCallback<AllocDeleterCallbackInfoWithQueue>, DeleterInfo);
437460
438461 if (CLErr != CL_SUCCESS) {
439462 // We can attempt to recover gracefully by attempting to wait for the
0 commit comments