Skip to content

Commit ab91195

Browse files
committed
Account for having to use the correct queue per-device.
1 parent 907a67b commit ab91195

File tree

2 files changed

+52
-11
lines changed

2 files changed

+52
-11
lines changed

source/adapters/opencl/usm.cpp

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
#include "common.hpp"
1414
#include "usm.hpp"
1515

16+
template <class T>
1617
void AllocDeleterCallback(cl_event event, cl_int, void *pUserData) {
1718
clReleaseEvent(event);
18-
auto Info = static_cast<AllocDeleterCallbackInfo *>(pUserData);
19+
auto Info = static_cast<T *>(pUserData);
1920
delete Info;
2021
}
2122

@@ -316,7 +317,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
316317
auto Info = new AllocDeleterCallbackInfo(USMFree, CLContext, HostBuffer);
317318

318319
ClErr =
319-
clSetEventCallback(CopyEvent, CL_COMPLETE, AllocDeleterCallback, Info);
320+
clSetEventCallback(CopyEvent, CL_COMPLETE,
321+
AllocDeleterCallback<AllocDeleterCallbackInfo>, Info);
320322
if (ClErr != CL_SUCCESS) {
321323
// We can attempt to recover gracefully by attempting to wait for the copy
322324
// to finish and deleting the info struct here.
@@ -376,6 +378,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
376378
sizeof(cl_device_id), &DstDevice, nullptr));
377379

378380
if ((SrcDevice && DstDevice) && SrcDevice != DstDevice) {
381+
// We need a queue associated with each device, so first figure out which
382+
// one we weren't given.
383+
cl_device_id QueueDevice = nullptr;
384+
CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(
385+
cl_adapter::cast<cl_command_queue>(hQueue), CL_QUEUE_DEVICE,
386+
sizeof(QueueDevice), &QueueDevice, nullptr));
387+
388+
cl_command_queue MissingQueue = nullptr, SrcQueue = nullptr,
389+
DstQueue = nullptr;
390+
if (QueueDevice == SrcDevice) {
391+
MissingQueue = clCreateCommandQueue(CLContext, DstDevice, 0, &CLErr);
392+
SrcQueue = cl_adapter::cast<cl_command_queue>(hQueue);
393+
DstQueue = MissingQueue;
394+
} else {
395+
MissingQueue = clCreateCommandQueue(CLContext, SrcDevice, 0, &CLErr);
396+
DstQueue = cl_adapter::cast<cl_command_queue>(hQueue);
397+
SrcQueue = MissingQueue;
398+
}
399+
CL_RETURN_ON_FAILURE(CLErr);
400+
379401
cl_event HostCopyEvent = nullptr, FinalCopyEvent = nullptr;
380402
clHostMemAllocINTEL_fn HostMemAlloc = nullptr;
381403
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<clHostMemAllocINTEL_fn>(
@@ -402,19 +424,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
402424
};
403425

404426
UR_RETURN_ON_FAILURE(checkCLErr(USMMemcpy(
405-
cl_adapter::cast<cl_command_queue>(hQueue), blocking, HostAlloc, pSrc,
406-
size, numEventsInWaitList,
427+
SrcQueue, blocking, HostAlloc, pSrc, size, numEventsInWaitList,
407428
cl_adapter::cast<const cl_event *>(phEventWaitList), &HostCopyEvent)));
408429

409-
UR_RETURN_ON_FAILURE(checkCLErr(
410-
USMMemcpy(cl_adapter::cast<cl_command_queue>(hQueue), blocking, pDst,
411-
HostAlloc, size, 1, &HostCopyEvent, &FinalCopyEvent)));
430+
UR_RETURN_ON_FAILURE(
431+
checkCLErr(USMMemcpy(DstQueue, blocking, pDst, HostAlloc, size, 1,
432+
&HostCopyEvent, &FinalCopyEvent)));
412433

413434
// If this is a blocking operation we can do our cleanup immediately,
414435
// otherwise we need to defer it to an event callback.
415436
if (blocking) {
416437
CL_RETURN_ON_FAILURE(USMFree(CLContext, HostAlloc));
417438
CL_RETURN_ON_FAILURE(clReleaseEvent(HostCopyEvent));
439+
CL_RETURN_ON_FAILURE(clReleaseCommandQueue(MissingQueue));
418440
if (phEvent) {
419441
*phEvent = cl_adapter::cast<ur_event_handle_t>(FinalCopyEvent);
420442
} else {
@@ -429,11 +451,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
429451
}
430452

431453
// This self destructs taking the event and allocation with it.
432-
auto DeleterInfo =
433-
new AllocDeleterCallbackInfo{USMFree, CLContext, HostAlloc};
454+
auto DeleterInfo = new AllocDeleterCallbackInfoWithQueue(
455+
USMFree, CLContext, HostAlloc, MissingQueue);
434456

435-
CLErr = clSetEventCallback(HostCopyEvent, CL_COMPLETE,
436-
AllocDeleterCallback, DeleterInfo);
457+
CLErr = clSetEventCallback(
458+
HostCopyEvent, CL_COMPLETE,
459+
AllocDeleterCallback<AllocDeleterCallbackInfoWithQueue>, DeleterInfo);
437460

438461
if (CLErr != CL_SUCCESS) {
439462
// We can attempt to recover gracefully by attempting to wait for the

source/adapters/opencl/usm.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,22 @@ struct AllocDeleterCallbackInfo {
3939
void *Allocation;
4040
};
4141

42+
struct AllocDeleterCallbackInfoWithQueue : AllocDeleterCallbackInfo {
43+
AllocDeleterCallbackInfoWithQueue(clMemBlockingFreeINTEL_fn USMFree,
44+
cl_context CLContext, void *Allocation,
45+
cl_command_queue CLQueue)
46+
: AllocDeleterCallbackInfo(USMFree, CLContext, Allocation),
47+
CLQueue(CLQueue) {
48+
clRetainContext(CLContext);
49+
}
50+
~AllocDeleterCallbackInfoWithQueue() { clReleaseCommandQueue(CLQueue); }
51+
AllocDeleterCallbackInfoWithQueue(const AllocDeleterCallbackInfoWithQueue &) =
52+
delete;
53+
AllocDeleterCallbackInfoWithQueue &
54+
operator=(const AllocDeleterCallbackInfoWithQueue &) = delete;
55+
56+
cl_command_queue CLQueue;
57+
};
58+
59+
template <class T>
4260
void AllocDeleterCallback(cl_event event, cl_int, void *pUserData);

0 commit comments

Comments
 (0)