Skip to content

Commit d6bbe48

Browse files
Handle SVM allocations from multi root device contexts
Related-To: NEO-5001, NEO-3691 Signed-off-by: Mateusz Jablonski <[email protected]>
1 parent 56b2686 commit d6bbe48

File tree

7 files changed

+144
-48
lines changed

7 files changed

+144
-48
lines changed

opencl/source/api/api.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4828,7 +4828,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
48284828
}
48294829
}
48304830

4831-
GraphicsAllocation *pSvmAlloc = nullptr;
4831+
MultiGraphicsAllocation *pSvmAllocs = nullptr;
48324832
if (argValue != nullptr) {
48334833
auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager();
48344834
auto svmData = svmManager->getSVMAlloc(argValue);
@@ -4841,11 +4841,11 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
48414841
}
48424842
}
48434843
} else {
4844-
pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pMultiDeviceKernel->getDevices()[0]->getRootDeviceIndex());
4844+
pSvmAllocs = &svmData->gpuAllocations;
48454845
}
48464846
}
48474847

4848-
retVal = pMultiDeviceKernel->setArgSvmAlloc(argIndex, const_cast<void *>(argValue), pSvmAlloc);
4848+
retVal = pMultiDeviceKernel->setArgSvmAlloc(argIndex, const_cast<void *>(argValue), pSvmAllocs);
48494849
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
48504850
return retVal;
48514851
}
@@ -4916,12 +4916,12 @@ cl_int CL_API_CALL clSetKernelExecInfo(cl_kernel kernel,
49164916
TRACING_EXIT(clSetKernelExecInfo, &retVal);
49174917
return retVal;
49184918
}
4919-
GraphicsAllocation *svmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pMultiDeviceKernel->getDevices()[0]->getRootDeviceIndex());
4919+
auto &svmAllocs = svmData->gpuAllocations;
49204920

49214921
if (paramName == CL_KERNEL_EXEC_INFO_SVM_PTRS) {
4922-
pMultiDeviceKernel->setSvmKernelExecInfo(svmAlloc);
4922+
pMultiDeviceKernel->setSvmKernelExecInfo(svmAllocs);
49234923
} else {
4924-
pMultiDeviceKernel->setUnifiedMemoryExecInfo(svmAlloc);
4924+
pMultiDeviceKernel->setUnifiedMemoryExecInfo(svmAllocs);
49254925
}
49264926
}
49274927
break;

opencl/source/kernel/kernel.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,9 +467,12 @@ cl_int Kernel::cloneKernel(Kernel *pSourceKernel) {
467467
}
468468

469469
// copy additional information other than argument values set to source kernel with clSetKernelExecInfo
470-
for (auto gfxAlloc : pSourceKernel->kernelSvmGfxAllocations) {
470+
for (auto &gfxAlloc : pSourceKernel->kernelSvmGfxAllocations) {
471471
kernelSvmGfxAllocations.push_back(gfxAlloc);
472472
}
473+
for (auto &gfxAlloc : pSourceKernel->kernelUnifiedMemoryGfxAllocations) {
474+
kernelUnifiedMemoryGfxAllocations.push_back(gfxAlloc);
475+
}
473476

474477
this->isBuiltIn = pSourceKernel->isBuiltIn;
475478

opencl/source/kernel/multi_device_kernel.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,8 @@ bool MultiDeviceKernel::getHasIndirectAccess() const { return defaultKernel->get
4848
cl_int MultiDeviceKernel::checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { return getResultFromEachKernel(&Kernel::checkCorrectImageAccessQualifier, argIndex, argSize, argValue); }
4949
void MultiDeviceKernel::unsetArg(uint32_t argIndex) { callOnEachKernel(&Kernel::unsetArg, argIndex); }
5050
cl_int MultiDeviceKernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { return getResultFromEachKernel(&Kernel::setArgument, argIndex, argSize, argVal); }
51-
cl_int MultiDeviceKernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { return getResultFromEachKernel(&Kernel::setArgSvmAlloc, argIndex, svmPtr, svmAlloc); }
5251
void MultiDeviceKernel::setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { callOnEachKernel(&Kernel::setUnifiedMemoryProperty, infoType, infoValue); }
53-
void MultiDeviceKernel::setSvmKernelExecInfo(GraphicsAllocation *argValue) { callOnEachKernel(&Kernel::setSvmKernelExecInfo, argValue); }
5452
void MultiDeviceKernel::clearSvmKernelExecInfo() { callOnEachKernel(&Kernel::clearSvmKernelExecInfo); }
55-
void MultiDeviceKernel::setUnifiedMemoryExecInfo(GraphicsAllocation *argValue) { callOnEachKernel(&Kernel::setUnifiedMemoryExecInfo, argValue); }
5653
void MultiDeviceKernel::clearUnifiedMemoryExecInfo() { callOnEachKernel(&Kernel::clearUnifiedMemoryExecInfo); }
5754
int MultiDeviceKernel::setKernelThreadArbitrationPolicy(uint32_t propertyValue) { return getResultFromEachKernel(&Kernel::setKernelThreadArbitrationPolicy, propertyValue); }
5855
cl_int MultiDeviceKernel::setKernelExecutionType(cl_execution_info_kernel_type_intel executionType) { return getResultFromEachKernel(&Kernel::setKernelExecutionType, executionType); }
@@ -68,4 +65,30 @@ cl_int MultiDeviceKernel::cloneKernel(MultiDeviceKernel *pSourceMultiDeviceKerne
6865
}
6966
return CL_SUCCESS;
7067
}
68+
cl_int MultiDeviceKernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, MultiGraphicsAllocation *svmAllocs) {
69+
for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernels.size(); rootDeviceIndex++) {
70+
auto pKernel = getKernel(rootDeviceIndex);
71+
if (pKernel) {
72+
auto svmAlloc = svmAllocs ? svmAllocs->getGraphicsAllocation(rootDeviceIndex) : nullptr;
73+
pKernel->setArgSvmAlloc(argIndex, svmPtr, svmAlloc);
74+
}
75+
}
76+
return CL_SUCCESS;
77+
}
78+
void MultiDeviceKernel::setSvmKernelExecInfo(const MultiGraphicsAllocation &argValue) {
79+
for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernels.size(); rootDeviceIndex++) {
80+
auto pKernel = getKernel(rootDeviceIndex);
81+
if (pKernel) {
82+
pKernel->setSvmKernelExecInfo(argValue.getGraphicsAllocation(rootDeviceIndex));
83+
}
84+
}
85+
}
86+
void MultiDeviceKernel::setUnifiedMemoryExecInfo(const MultiGraphicsAllocation &argValue) {
87+
for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernels.size(); rootDeviceIndex++) {
88+
auto pKernel = getKernel(rootDeviceIndex);
89+
if (pKernel) {
90+
pKernel->setUnifiedMemoryExecInfo(argValue.getGraphicsAllocation(rootDeviceIndex));
91+
}
92+
}
93+
}
7194
} // namespace NEO

opencl/source/kernel/multi_device_kernel.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ class MultiDeviceKernel : public BaseObject<_cl_kernel> {
5454
const ClDeviceVector &getDevices() const;
5555
size_t getKernelArgsNumber() const;
5656
Context &getContext() const;
57-
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc);
57+
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, MultiGraphicsAllocation *svmAllocs);
5858
bool getHasIndirectAccess() const;
5959
void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue);
60-
void setSvmKernelExecInfo(GraphicsAllocation *argValue);
60+
void setSvmKernelExecInfo(const MultiGraphicsAllocation &argValue);
6161
void clearSvmKernelExecInfo();
62-
void setUnifiedMemoryExecInfo(GraphicsAllocation *argValue);
62+
void setUnifiedMemoryExecInfo(const MultiGraphicsAllocation &argValue);
6363
void clearUnifiedMemoryExecInfo();
6464
int setKernelThreadArbitrationPolicy(uint32_t propertyValue);
6565
cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType);

opencl/test/unit_test/kernel/clone_kernel_tests.cpp

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -494,35 +494,42 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) {
494494
}
495495

496496
TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect) {
497-
char *svmPtr = new char[256];
498-
MockGraphicsAllocation svmAlloc(svmPtr, 256);
497+
char memory[100] = {};
498+
MultiGraphicsAllocation multiGraphicsAllocation(3);
499+
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
500+
auto svmAlloc = new MockGraphicsAllocation(rootDeviceIndex, memory, 100);
501+
multiGraphicsAllocation.addAllocation(svmAlloc);
502+
}
499503

500-
auto rootDeviceIndex = *context->getRootDeviceIndices().begin();
501-
retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
504+
retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, memory, &multiGraphicsAllocation);
502505
ASSERT_EQ(CL_SUCCESS, retVal);
503506

504-
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
505-
EXPECT_EQ(Kernel::SVM_ALLOC_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
506-
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
507-
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
508-
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
507+
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
508+
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size());
509+
EXPECT_EQ(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
510+
EXPECT_EQ(Kernel::SVM_ALLOC_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
511+
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
512+
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
513+
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
514+
}
509515

510516
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
511517
EXPECT_EQ(CL_SUCCESS, retVal);
512518

513-
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
514-
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
515-
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
516-
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
517-
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
518-
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
519-
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
520-
521-
auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
522-
pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset);
523-
EXPECT_EQ(svmPtr, *pKernelArg);
519+
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
520+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size());
521+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type);
522+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object);
523+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value);
524+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
525+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
526+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
524527

525-
delete[] svmPtr;
528+
auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
529+
pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset);
530+
EXPECT_EQ(memory, *pKernelArg);
531+
delete multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
532+
}
526533
}
527534

528535
TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrect) {
@@ -565,13 +572,14 @@ TEST_F(CloneKernelTest, GivenExecInfoWhenCloningKernelThenSvmAllocationIsCorrect
565572

566573
auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
567574
ASSERT_NE(nullptr, svmData);
568-
GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(device1->getRootDeviceIndex());
569-
ASSERT_NE(nullptr, pSvmAlloc);
575+
auto &pSvmAllocs = svmData->gpuAllocations;
570576

571-
pSourceMultiDeviceKernel->setSvmKernelExecInfo(pSvmAlloc);
577+
pSourceMultiDeviceKernel->setSvmKernelExecInfo(pSvmAllocs);
572578

573579
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
574580
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size());
581+
EXPECT_NE(nullptr, pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0));
582+
EXPECT_EQ(pSvmAllocs.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0));
575583
}
576584

577585
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
@@ -585,6 +593,34 @@ TEST_F(CloneKernelTest, GivenExecInfoWhenCloningKernelThenSvmAllocationIsCorrect
585593
context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
586594
}
587595

596+
TEST_F(CloneKernelTest, GivenUnifiedMemoryExecInfoWhenCloningKernelThenUnifiedMemoryAllocationIsCorrect) {
597+
REQUIRE_SVM_OR_SKIP(device1);
598+
void *ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
599+
ASSERT_NE(nullptr, ptrSVM);
600+
601+
auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
602+
ASSERT_NE(nullptr, svmData);
603+
auto &pSvmAllocs = svmData->gpuAllocations;
604+
605+
pSourceMultiDeviceKernel->setUnifiedMemoryExecInfo(pSvmAllocs);
606+
607+
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
608+
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size());
609+
EXPECT_NE(nullptr, pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0));
610+
EXPECT_EQ(pSvmAllocs.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0));
611+
}
612+
613+
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
614+
EXPECT_EQ(CL_SUCCESS, retVal);
615+
616+
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
617+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size(), pClonedKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size());
618+
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0), pClonedKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0));
619+
}
620+
621+
context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
622+
}
623+
588624
TEST_F(CloneKernelTest, givenBuiltinSourceKernelWhenCloningThenSetBuiltinFlagToClonedKernel) {
589625
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
590626
pSourceKernel[rootDeviceIndex]->isBuiltIn = true;

opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "opencl/source/api/api.h"
1717
#include "opencl/source/mem_obj/mem_obj_helper.h"
1818
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
19+
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
1920
#include "opencl/test/unit_test/mocks/mock_buffer.h"
2021
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
2122
#include "opencl/test/unit_test/mocks/mock_context.h"
@@ -72,6 +73,29 @@ TEST_F(SVMMemoryAllocatorTest, whenRequestSVMAllocsThenReturnNonNullptr) {
7273
EXPECT_NE(svmAllocs, nullptr);
7374
}
7475

76+
using MultiDeviceSVMMemoryAllocatorTest = MultiRootDeviceWithSubDevicesFixture;
77+
78+
TEST_F(MultiDeviceSVMMemoryAllocatorTest, givenMultipleDevicesWhenCreatingSVMAllocThenCreateOneGraphicsAllocationPerRootDeviceIndex) {
79+
REQUIRE_SVM_OR_SKIP(device1);
80+
auto svmManager = std::make_unique<MockSVMAllocsManager>(device1->getMemoryManager(), false);
81+
82+
auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
83+
EXPECT_NE(nullptr, ptr);
84+
auto svmData = svmManager->getSVMAlloc(ptr);
85+
EXPECT_EQ(1u, svmManager->SVMAllocs.getNumAllocs());
86+
ASSERT_NE(nullptr, svmData);
87+
for (auto &rootDeviceIndex : context->getRootDeviceIndices()) {
88+
auto svmAllocation = svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
89+
EXPECT_NE(nullptr, svmAllocation);
90+
EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_ZERO_COPY, svmAllocation->getAllocationType());
91+
EXPECT_FALSE(svmAllocation->isCoherent());
92+
}
93+
94+
svmManager->freeSVMAlloc(ptr);
95+
EXPECT_EQ(nullptr, svmManager->getSVMAlloc(ptr));
96+
EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs());
97+
}
98+
7599
TEST_F(SVMMemoryAllocatorTest, whenSVMAllocationIsFreedThenCannotBeGotAgain) {
76100
auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields);
77101
EXPECT_NE(nullptr, ptr);

shared/source/memory_manager/unified_memory_manager.cpp

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ void *SVMAllocsManager::createSVMAlloc(size_t size, const SvmAllocationPropertie
112112
if (size == 0)
113113
return nullptr;
114114

115+
if (rootDeviceIndices.size() > 1) {
116+
return createZeroCopySvmAllocation(size, svmProperties, rootDeviceIndices, subdeviceBitfields);
117+
}
115118
if (!memoryManager->isLocalMemorySupported(*rootDeviceIndices.begin())) {
116119
return createZeroCopySvmAllocation(size, svmProperties, rootDeviceIndices, subdeviceBitfields);
117120
} else {
@@ -349,20 +352,26 @@ void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAlloca
349352
false, // isMultiStorageAllocation
350353
deviceBitfield};
351354
MemoryPropertiesHelper::fillCachePolicyInProperties(properties, false, svmProperties.readOnly, false, properties.cacheRegion);
352-
GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
353-
if (!allocation) {
355+
356+
std::vector<uint32_t> rootDeviceIndicesVector(rootDeviceIndices.begin(), rootDeviceIndices.end());
357+
358+
auto maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less<uint32_t const>());
359+
SvmAllocationData allocData(maxRootDeviceIndex);
360+
361+
void *usmPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, properties, allocData.gpuAllocations);
362+
if (!usmPtr) {
354363
return nullptr;
355364
}
356-
allocation->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
357-
allocation->setCoherent(svmProperties.coherent);
358-
359-
SvmAllocationData allocData(rootDeviceIndex);
360-
allocData.gpuAllocations.addAllocation(allocation);
365+
for (const auto &rootDeviceIndex : rootDeviceIndices) {
366+
auto allocation = allocData.gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
367+
allocation->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
368+
allocation->setCoherent(svmProperties.coherent);
369+
}
361370
allocData.size = size;
362371

363372
std::unique_lock<SpinLock> lock(mtx);
364373
this->SVMAllocs.insert(allocData);
365-
return allocation->getUnderlyingBuffer();
374+
return usmPtr;
366375
}
367376

368377
void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
@@ -415,10 +424,11 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co
415424
}
416425

417426
void SVMAllocsManager::freeZeroCopySvmAllocation(SvmAllocationData *svmData) {
418-
GraphicsAllocation *gpuAllocation = svmData->gpuAllocations.getDefaultGraphicsAllocation();
427+
auto gpuAllocations = svmData->gpuAllocations;
419428
SVMAllocs.remove(*svmData);
420-
421-
memoryManager->freeGraphicsMemory(gpuAllocation);
429+
for (const auto &graphicsAllocation : gpuAllocations.getGraphicsAllocations()) {
430+
memoryManager->freeGraphicsMemory(graphicsAllocation);
431+
}
422432
}
423433

424434
void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) {

0 commit comments

Comments
 (0)