Skip to content

Commit 6feda37

Browse files
aarongreigmartygrant
authored andcommitted
Account for having to use the correct queue per-device.
1 parent de821e3 commit 6feda37

File tree

2 files changed

+52
-11
lines changed

2 files changed

+52
-11
lines changed

source/adapters/opencl/usm.cpp

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
#include "common.hpp"
1414
#include "usm.hpp"
1515

16+
template <class T>
1617
void AllocDeleterCallback(cl_event event, cl_int, void *pUserData) {
1718
clReleaseEvent(event);
18-
auto Info = static_cast<AllocDeleterCallbackInfo *>(pUserData);
19+
auto Info = static_cast<T *>(pUserData);
1920
delete Info;
2021
}
2122

@@ -301,7 +302,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
301302
auto Info = new AllocDeleterCallbackInfo(USMFree, CLContext, HostBuffer);
302303

303304
ClErr =
304-
clSetEventCallback(CopyEvent, CL_COMPLETE, AllocDeleterCallback, Info);
305+
clSetEventCallback(CopyEvent, CL_COMPLETE,
306+
AllocDeleterCallback<AllocDeleterCallbackInfo>, Info);
305307
if (ClErr != CL_SUCCESS) {
306308
// We can attempt to recover gracefully by attempting to wait for the copy
307309
// to finish and deleting the info struct here.
@@ -361,6 +363,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
361363
sizeof(cl_device_id), &DstDevice, nullptr));
362364

363365
if ((SrcDevice && DstDevice) && SrcDevice != DstDevice) {
366+
// We need a queue associated with each device, so first figure out which
367+
// one we weren't given.
368+
cl_device_id QueueDevice = nullptr;
369+
CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(
370+
cl_adapter::cast<cl_command_queue>(hQueue), CL_QUEUE_DEVICE,
371+
sizeof(QueueDevice), &QueueDevice, nullptr));
372+
373+
cl_command_queue MissingQueue = nullptr, SrcQueue = nullptr,
374+
DstQueue = nullptr;
375+
if (QueueDevice == SrcDevice) {
376+
MissingQueue = clCreateCommandQueue(CLContext, DstDevice, 0, &CLErr);
377+
SrcQueue = cl_adapter::cast<cl_command_queue>(hQueue);
378+
DstQueue = MissingQueue;
379+
} else {
380+
MissingQueue = clCreateCommandQueue(CLContext, SrcDevice, 0, &CLErr);
381+
DstQueue = cl_adapter::cast<cl_command_queue>(hQueue);
382+
SrcQueue = MissingQueue;
383+
}
384+
CL_RETURN_ON_FAILURE(CLErr);
385+
364386
cl_event HostCopyEvent = nullptr, FinalCopyEvent = nullptr;
365387
clHostMemAllocINTEL_fn HostMemAlloc = nullptr;
366388
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<clHostMemAllocINTEL_fn>(
@@ -387,19 +409,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
387409
};
388410

389411
UR_RETURN_ON_FAILURE(checkCLErr(USMMemcpy(
390-
cl_adapter::cast<cl_command_queue>(hQueue), blocking, HostAlloc, pSrc,
391-
size, numEventsInWaitList,
412+
SrcQueue, blocking, HostAlloc, pSrc, size, numEventsInWaitList,
392413
cl_adapter::cast<const cl_event *>(phEventWaitList), &HostCopyEvent)));
393414

394-
UR_RETURN_ON_FAILURE(checkCLErr(
395-
USMMemcpy(cl_adapter::cast<cl_command_queue>(hQueue), blocking, pDst,
396-
HostAlloc, size, 1, &HostCopyEvent, &FinalCopyEvent)));
415+
UR_RETURN_ON_FAILURE(
416+
checkCLErr(USMMemcpy(DstQueue, blocking, pDst, HostAlloc, size, 1,
417+
&HostCopyEvent, &FinalCopyEvent)));
397418

398419
// If this is a blocking operation we can do our cleanup immediately,
399420
// otherwise we need to defer it to an event callback.
400421
if (blocking) {
401422
CL_RETURN_ON_FAILURE(USMFree(CLContext, HostAlloc));
402423
CL_RETURN_ON_FAILURE(clReleaseEvent(HostCopyEvent));
424+
CL_RETURN_ON_FAILURE(clReleaseCommandQueue(MissingQueue));
403425
if (phEvent) {
404426
*phEvent = cl_adapter::cast<ur_event_handle_t>(FinalCopyEvent);
405427
} else {
@@ -414,11 +436,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
414436
}
415437

416438
// This self destructs taking the event and allocation with it.
417-
auto DeleterInfo =
418-
new AllocDeleterCallbackInfo{USMFree, CLContext, HostAlloc};
439+
auto DeleterInfo = new AllocDeleterCallbackInfoWithQueue(
440+
USMFree, CLContext, HostAlloc, MissingQueue);
419441

420-
CLErr = clSetEventCallback(HostCopyEvent, CL_COMPLETE,
421-
AllocDeleterCallback, DeleterInfo);
442+
CLErr = clSetEventCallback(
443+
HostCopyEvent, CL_COMPLETE,
444+
AllocDeleterCallback<AllocDeleterCallbackInfoWithQueue>, DeleterInfo);
422445

423446
if (CLErr != CL_SUCCESS) {
424447
// We can attempt to recover gracefully by attempting to wait for the

source/adapters/opencl/usm.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,22 @@ struct AllocDeleterCallbackInfo {
3939
void *Allocation;
4040
};
4141

42+
struct AllocDeleterCallbackInfoWithQueue : AllocDeleterCallbackInfo {
43+
AllocDeleterCallbackInfoWithQueue(clMemBlockingFreeINTEL_fn USMFree,
44+
cl_context CLContext, void *Allocation,
45+
cl_command_queue CLQueue)
46+
: AllocDeleterCallbackInfo(USMFree, CLContext, Allocation),
47+
CLQueue(CLQueue) {
48+
clRetainContext(CLContext);
49+
}
50+
~AllocDeleterCallbackInfoWithQueue() { clReleaseCommandQueue(CLQueue); }
51+
AllocDeleterCallbackInfoWithQueue(const AllocDeleterCallbackInfoWithQueue &) =
52+
delete;
53+
AllocDeleterCallbackInfoWithQueue &
54+
operator=(const AllocDeleterCallbackInfoWithQueue &) = delete;
55+
56+
cl_command_queue CLQueue;
57+
};
58+
59+
template <class T>
4260
void AllocDeleterCallback(cl_event event, cl_int, void *pUserData);

0 commit comments

Comments
 (0)