Skip to content

Commit fa3d4f3

Browse files
Enabling clEnqueueSVMMemcpy between SVM and host pointer
Related-To: NEO-3011 Change-Id: I89aad599d7238ea2d319a4b1c72dffea2dba952b Signed-off-by: Koska, Andrzej <[email protected]>
1 parent c967327 commit fa3d4f3

File tree

3 files changed

+272
-23
lines changed

3 files changed

+272
-23
lines changed

runtime/command_queue/enqueue_svm.h

Lines changed: 94 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMFree(cl_uint numSvmPointers,
264264
return CL_SUCCESS;
265265
}
266266

267+
inline void setOperationParams(BuiltinDispatchInfoBuilder::BuiltinOpParams &operationParams, size_t size,
268+
const void *srcPtr, GraphicsAllocation *srcSvmAlloc, size_t srcPtrOffset,
269+
void *dstPtr, GraphicsAllocation *dstSvmAlloc, size_t dstPtrOffset) {
270+
operationParams.size = {size, 0, 0};
271+
operationParams.srcPtr = const_cast<void *>(srcPtr);
272+
operationParams.srcSvmAlloc = srcSvmAlloc;
273+
operationParams.srcOffset = {srcPtrOffset, 0, 0};
274+
operationParams.dstPtr = dstPtr;
275+
operationParams.dstSvmAlloc = dstSvmAlloc;
276+
operationParams.dstOffset = {dstPtrOffset, 0, 0};
277+
}
278+
267279
template <typename GfxFamily>
268280
cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
269281
void *dstPtr,
@@ -273,40 +285,100 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
273285
const cl_event *eventWaitList,
274286
cl_event *event) {
275287

288+
if ((dstPtr == nullptr) || (srcPtr == nullptr)) {
289+
return CL_INVALID_VALUE;
290+
}
276291
auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstPtr);
277292
auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcPtr);
278-
if ((dstSvmData == nullptr) || (srcSvmData == nullptr)) {
293+
294+
enum CopyType { InvalidCopyType,
295+
SvmToHost,
296+
HostToSvm,
297+
SvmToSvm };
298+
CopyType copyType = InvalidCopyType;
299+
if ((srcSvmData != nullptr) && (dstSvmData != nullptr)) {
300+
copyType = SvmToSvm;
301+
} else if ((srcSvmData == nullptr) && (dstSvmData != nullptr)) {
302+
copyType = HostToSvm;
303+
} else if (srcSvmData != nullptr) {
304+
copyType = SvmToHost;
305+
} else {
279306
return CL_INVALID_VALUE;
280307
}
281308

282309
MultiDispatchInfo dispatchInfo;
283-
284310
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
285311
this->getContext(), this->getDevice());
286-
287312
BuiltInOwnershipWrapper builtInLock(builder, this->context);
288-
289313
BuiltinDispatchInfoBuilder::BuiltinOpParams operationParams;
290-
operationParams.srcPtr = const_cast<void *>(srcPtr);
291-
operationParams.dstPtr = dstPtr;
292-
operationParams.srcSvmAlloc = srcSvmData->gpuAllocation;
293-
operationParams.dstSvmAlloc = dstSvmData->gpuAllocation;
294-
operationParams.srcOffset = {0, 0, 0};
295-
operationParams.dstOffset = {0, 0, 0};
296-
operationParams.size = {size, 0, 0};
297-
builder.buildDispatchInfos(dispatchInfo, operationParams);
298314

299-
GeneralSurface s1(srcSvmData->gpuAllocation), s2(dstSvmData->gpuAllocation);
300-
Surface *surfaces[] = {&s1, &s2};
301-
302-
enqueueHandler<CL_COMMAND_SVM_MEMCPY>(
303-
surfaces,
304-
blockingCopy ? true : false,
305-
dispatchInfo,
306-
numEventsInWaitList,
307-
eventWaitList,
308-
event);
315+
Surface *surfaces[2];
316+
if (copyType == SvmToHost) {
317+
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation);
318+
HostPtrSurface dstHostPtrSurf(dstPtr, size);
319+
if (size != 0) {
320+
bool status = getCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true);
321+
if (!status) {
322+
return CL_OUT_OF_RESOURCES;
323+
}
324+
dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
325+
}
309326

327+
void *alignedDstPtr = alignDown(dstPtr, 4);
328+
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
329+
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, 0, alignedDstPtr, nullptr, dstPtrOffset);
330+
surfaces[0] = &srcSvmSurf;
331+
surfaces[1] = &dstHostPtrSurf;
332+
builder.buildDispatchInfos(dispatchInfo, operationParams);
333+
enqueueHandler<CL_COMMAND_READ_BUFFER>(
334+
surfaces,
335+
blockingCopy == CL_TRUE,
336+
dispatchInfo,
337+
numEventsInWaitList,
338+
eventWaitList,
339+
event);
340+
} else if (copyType == HostToSvm) {
341+
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
342+
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation);
343+
if (size != 0) {
344+
bool status = getCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false);
345+
if (!status) {
346+
return CL_OUT_OF_RESOURCES;
347+
}
348+
srcPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
349+
}
350+
void *alignedSrcPtr = alignDown(const_cast<void *>(srcPtr), 4);
351+
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
352+
setOperationParams(operationParams, size, alignedSrcPtr, nullptr, srcPtrOffset, dstPtr, dstSvmData->gpuAllocation, 0);
353+
surfaces[0] = &dstSvmSurf;
354+
surfaces[1] = &srcHostPtrSurf;
355+
builder.buildDispatchInfos(dispatchInfo, operationParams);
356+
enqueueHandler<CL_COMMAND_WRITE_BUFFER>(
357+
surfaces,
358+
blockingCopy == CL_TRUE,
359+
dispatchInfo,
360+
numEventsInWaitList,
361+
eventWaitList,
362+
event);
363+
} else {
364+
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation);
365+
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation);
366+
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, 0, dstPtr, dstSvmData->gpuAllocation, 0);
367+
surfaces[0] = &srcSvmSurf;
368+
surfaces[1] = &dstSvmSurf;
369+
builder.buildDispatchInfos(dispatchInfo, operationParams);
370+
enqueueHandler<CL_COMMAND_SVM_MEMCPY>(
371+
surfaces,
372+
blockingCopy ? true : false,
373+
dispatchInfo,
374+
numEventsInWaitList,
375+
eventWaitList,
376+
event);
377+
}
378+
if (event) {
379+
auto pEvent = castToObjectOrAbort<Event>(*event);
380+
pEvent->setCmdType(CL_COMMAND_SVM_MEMCPY);
381+
}
310382
return CL_SUCCESS;
311383
}
312384

runtime/command_stream/command_stream_receiver.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ class CommandStreamReceiver {
154154
AllocationsList &getTemporaryAllocations();
155155
AllocationsList &getAllocationsForReuse();
156156
InternalAllocationStorage *getInternalAllocationStorage() const { return internalAllocationStorage.get(); }
157-
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush);
157+
MOCKABLE_VIRTUAL bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush);
158158
virtual size_t getPreferredTagPoolSize() const { return 512; }
159159
virtual void setupContext(OsContext &osContext) { this->osContext = &osContext; }
160160
OsContext &getOsContext() const { return *osContext; }

unit_tests/command_queue/enqueue_svm_tests.cpp

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,133 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_InvalidValueSrcPtrIsNull) {
278278
EXPECT_EQ(CL_INVALID_VALUE, retVal);
279279
}
280280

281+
TEST_F(EnqueueSvmTest, GivenSrcHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) {
282+
char srcHostPtr[260];
283+
void *pDstSVM = ptrSVM;
284+
void *pSrcSVM = srcHostPtr;
285+
cl_event event = nullptr;
286+
retVal = this->pCmdQ->enqueueSVMMemcpy(
287+
false, // cl_bool blocking_copy
288+
pDstSVM, // void *dst_ptr
289+
pSrcSVM, // const void *src_ptr
290+
256, // size_t size
291+
0, // cl_uint num_events_in_wait_list
292+
nullptr, // cl_evebt *event_wait_list
293+
&event // cL_event *event
294+
);
295+
EXPECT_EQ(CL_SUCCESS, retVal);
296+
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY;
297+
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
298+
EXPECT_EQ(expectedCmd, actualCmd);
299+
clReleaseEvent(event);
300+
}
301+
302+
TEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) {
303+
char srcHostPtr[260];
304+
void *pDstSVM = ptrSVM;
305+
void *pSrcSVM = srcHostPtr;
306+
retVal = this->pCmdQ->enqueueSVMMemcpy(
307+
false, // cl_bool blocking_copy
308+
pDstSVM, // void *dst_ptr
309+
pSrcSVM, // const void *src_ptr
310+
0, // size_t size
311+
0, // cl_uint num_events_in_wait_list
312+
nullptr, // cl_evebt *event_wait_list
313+
nullptr // cL_event *event
314+
);
315+
EXPECT_EQ(CL_SUCCESS, retVal);
316+
}
317+
318+
HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrWhenEnqueueSVMMemcpyThenEnqueuWriteBufferIsCalled) {
319+
char srcHostPtr[260];
320+
void *pSrcSVM = srcHostPtr;
321+
void *pDstSVM = ptrSVM;
322+
MockCommandQueueHw<FamilyType> myCmdQ(context, pDevice, 0);
323+
retVal = myCmdQ.enqueueSVMMemcpy(
324+
false, // cl_bool blocking_copy
325+
pDstSVM, // void *dst_ptr
326+
pSrcSVM, // const void *src_ptr
327+
256, // size_t size
328+
0, // cl_uint num_events_in_wait_list
329+
nullptr, // cl_evebt *event_wait_list
330+
nullptr // cL_event *event
331+
);
332+
EXPECT_EQ(CL_SUCCESS, retVal);
333+
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER));
334+
}
335+
336+
HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenEnqueueSVMMemcpyThenEnqueuReadBufferIsCalled) {
337+
char dstHostPtr[260];
338+
void *pDstSVM = dstHostPtr;
339+
void *pSrcSVM = ptrSVM;
340+
MockCommandQueueHw<FamilyType> myCmdQ(context, pDevice, 0);
341+
retVal = myCmdQ.enqueueSVMMemcpy(
342+
false, // cl_bool blocking_copy
343+
pDstSVM, // void *dst_ptr
344+
pSrcSVM, // const void *src_ptr
345+
256, // size_t size
346+
0, // cl_uint num_events_in_wait_list
347+
nullptr, // cl_evebt *event_wait_list
348+
nullptr // cL_event *event
349+
);
350+
EXPECT_EQ(CL_SUCCESS, retVal);
351+
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_READ_BUFFER));
352+
}
353+
354+
TEST_F(EnqueueSvmTest, GivenDstHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) {
355+
char dstHostPtr[260];
356+
void *pDstSVM = dstHostPtr;
357+
void *pSrcSVM = ptrSVM;
358+
cl_event event = nullptr;
359+
retVal = this->pCmdQ->enqueueSVMMemcpy(
360+
false, // cl_bool blocking_copy
361+
pDstSVM, // void *dst_ptr
362+
pSrcSVM, // const void *src_ptr
363+
256, // size_t size
364+
0, // cl_uint num_events_in_wait_list
365+
nullptr, // cl_evebt *event_wait_list
366+
&event // cL_event *event
367+
);
368+
EXPECT_EQ(CL_SUCCESS, retVal);
369+
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY;
370+
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
371+
EXPECT_EQ(expectedCmd, actualCmd);
372+
clReleaseEvent(event);
373+
}
374+
375+
TEST_F(EnqueueSvmTest, GivenDstHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) {
376+
char dstHostPtr[260];
377+
void *pDstSVM = dstHostPtr;
378+
void *pSrcSVM = ptrSVM;
379+
retVal = this->pCmdQ->enqueueSVMMemcpy(
380+
false, // cl_bool blocking_copy
381+
pDstSVM, // void *dst_ptr
382+
pSrcSVM, // const void *src_ptr
383+
0, // size_t size
384+
0, // cl_uint num_events_in_wait_list
385+
nullptr, // cl_evebt *event_wait_list
386+
nullptr // cL_event *event
387+
);
388+
EXPECT_EQ(CL_SUCCESS, retVal);
389+
}
390+
391+
TEST_F(EnqueueSvmTest, GivenDstHostPtrAndSrcHostPtrWhenEnqueueSVMMemcpyThenReturnInvalidValue) {
392+
char dstHostPtr[260];
393+
char srcHostPtr[260];
394+
void *pDstSVM = dstHostPtr;
395+
void *pSrcSVM = srcHostPtr;
396+
retVal = this->pCmdQ->enqueueSVMMemcpy(
397+
false, // cl_bool blocking_copy
398+
pDstSVM, // void *dst_ptr
399+
pSrcSVM, // const void *src_ptr
400+
256, // size_t size
401+
0, // cl_uint num_events_in_wait_list
402+
nullptr, // cl_evebt *event_wait_list
403+
nullptr // cL_event *event
404+
);
405+
EXPECT_EQ(CL_INVALID_VALUE, retVal);
406+
}
407+
281408
TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_Success) {
282409
void *pDstSVM = ptrSVM;
283410
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {});
@@ -915,3 +1042,53 @@ HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionAn
9151042
auto walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
9161043
EXPECT_EQ(2u, walkerCount);
9171044
}
1045+
1046+
template <typename GfxFamily>
1047+
struct FailCsr : public CommandStreamReceiverHw<GfxFamily> {
1048+
FailCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw<GfxFamily>(executionEnvironment){};
1049+
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
1050+
return CL_FALSE;
1051+
}
1052+
};
1053+
1054+
HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {
1055+
char dstHostPtr[260];
1056+
void *pDstSVM = dstHostPtr;
1057+
void *pSrcSVM = ptrSVM;
1058+
MockCommandQueueHw<FamilyType> cmdQ(context, pDevice, nullptr);
1059+
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment());
1060+
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.engine->commandStreamReceiver;
1061+
cmdQ.engine->commandStreamReceiver = failCsr.get();
1062+
retVal = cmdQ.enqueueSVMMemcpy(
1063+
false, // cl_bool blocking_copy
1064+
pDstSVM, // void *dst_ptr
1065+
pSrcSVM, // const void *src_ptr
1066+
256, // size_t size
1067+
0, // cl_uint num_events_in_wait_list
1068+
nullptr, // cl_evebt *event_wait_list
1069+
nullptr // cL_event *event
1070+
);
1071+
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
1072+
cmdQ.engine->commandStreamReceiver = oldCommandStreamReceiver;
1073+
}
1074+
1075+
HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {
1076+
char srcHostPtr[260];
1077+
void *pDstSVM = ptrSVM;
1078+
void *pSrcSVM = srcHostPtr;
1079+
MockCommandQueueHw<FamilyType> cmdQ(context, pDevice, nullptr);
1080+
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment());
1081+
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.engine->commandStreamReceiver;
1082+
cmdQ.engine->commandStreamReceiver = failCsr.get();
1083+
retVal = cmdQ.enqueueSVMMemcpy(
1084+
false, // cl_bool blocking_copy
1085+
pDstSVM, // void *dst_ptr
1086+
pSrcSVM, // const void *src_ptr
1087+
256, // size_t size
1088+
0, // cl_uint num_events_in_wait_list
1089+
nullptr, // cl_evebt *event_wait_list
1090+
nullptr // cL_event *event
1091+
);
1092+
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
1093+
cmdQ.engine->commandStreamReceiver = oldCommandStreamReceiver;
1094+
}

0 commit comments

Comments
 (0)