Skip to content

Commit c0b7f05

Browse files
Add memory prefetch for kmd migrated shared allocations
This feature is disabled by default, controlled with the knob AppendMemoryPrefetchForKmdMigratedSharedAllocations Related-To: NEO-6740 Signed-off-by: Milczarek, Slawomir <[email protected]>
1 parent 10e7b9d commit c0b7f05

File tree

16 files changed

+244
-0
lines changed

16 files changed

+244
-0
lines changed

level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@ ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const
3131
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
3232
}
3333

34+
auto allowPrefetchingKmdMigratedSharedAllocation = false;
35+
if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() != -1) {
36+
allowPrefetchingKmdMigratedSharedAllocation = !!NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get();
37+
}
38+
39+
if (allowPrefetchingKmdMigratedSharedAllocation) {
40+
auto memoryManager = device->getDriverHandle()->getMemoryManager();
41+
if (memoryManager->isKmdMigrationAvailable(device->getRootDeviceIndex()) &&
42+
(allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) {
43+
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
44+
memoryManager->setMemPrefetch(alloc, device->getRootDeviceIndex());
45+
}
46+
}
47+
3448
if (NEO::DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
3549
return ZE_RESULT_SUCCESS;
3650
}

level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,137 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenDebugFlagSetWhenPrefetchApiCal
105105
context->freeMem(ptr);
106106
}
107107

108+
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiCalledThenDontSetMemPrefetch, IsXeHpcCore) {
109+
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
110+
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
111+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
112+
113+
size_t size = 10;
114+
size_t alignment = 1u;
115+
void *ptr = nullptr;
116+
117+
ze_device_mem_alloc_desc_t deviceDesc = {};
118+
ze_host_mem_alloc_desc_t hostDesc = {};
119+
auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
120+
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
121+
EXPECT_NE(nullptr, ptr);
122+
123+
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
124+
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
125+
126+
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
127+
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
128+
129+
context->freeMem(ptr);
130+
}
131+
132+
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenPrefetchApiCalledThenDontCallSetMemPrefetchByDefault, IsXeHpcCore) {
133+
DebugManagerStateRestore restore;
134+
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
135+
136+
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
137+
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
138+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
139+
140+
size_t size = 10;
141+
size_t alignment = 1u;
142+
void *ptr = nullptr;
143+
144+
ze_device_mem_alloc_desc_t deviceDesc = {};
145+
ze_host_mem_alloc_desc_t hostDesc = {};
146+
auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
147+
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
148+
EXPECT_NE(nullptr, ptr);
149+
150+
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
151+
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
152+
153+
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
154+
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
155+
156+
context->freeMem(ptr);
157+
}
158+
159+
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedSharedMemoryThenCallSetMemPrefetch, IsXeHpcCore) {
160+
DebugManagerStateRestore restore;
161+
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
162+
DebugManager.flags.UseKmdMigration.set(1);
163+
164+
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
165+
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
166+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
167+
168+
size_t size = 10;
169+
size_t alignment = 1u;
170+
void *ptr = nullptr;
171+
172+
ze_device_mem_alloc_desc_t deviceDesc = {};
173+
ze_host_mem_alloc_desc_t hostDesc = {};
174+
auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
175+
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
176+
EXPECT_NE(nullptr, ptr);
177+
178+
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
179+
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
180+
181+
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
182+
EXPECT_TRUE(memoryManager->setMemPrefetchCalled);
183+
184+
context->freeMem(ptr);
185+
}
186+
187+
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedDeviceMemoryThenDontCallSetMemPrefetch, IsXeHpcCore) {
188+
DebugManagerStateRestore restore;
189+
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
190+
DebugManager.flags.UseKmdMigration.set(1);
191+
192+
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
193+
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
194+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
195+
196+
size_t size = 10;
197+
size_t alignment = 1u;
198+
void *ptr = nullptr;
199+
200+
ze_device_mem_alloc_desc_t deviceDesc = {};
201+
context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr);
202+
EXPECT_NE(nullptr, ptr);
203+
204+
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
205+
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
206+
207+
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
208+
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
209+
210+
context->freeMem(ptr);
211+
}
212+
213+
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedHostMemoryThenDontCallSetMemPrefetch, IsXeHpcCore) {
214+
DebugManagerStateRestore restore;
215+
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
216+
DebugManager.flags.UseKmdMigration.set(1);
217+
218+
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
219+
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
220+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
221+
222+
size_t size = 10;
223+
size_t alignment = 1u;
224+
void *ptr = nullptr;
225+
226+
ze_host_mem_alloc_desc_t hostDesc = {};
227+
context->allocHostMem(&hostDesc, size, alignment, &ptr);
228+
EXPECT_NE(nullptr, ptr);
229+
230+
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
231+
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
232+
233+
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
234+
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
235+
236+
context->freeMem(ptr);
237+
}
238+
108239
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenCommandBufferIsExhaustedWhenPrefetchApiCalledThenProgramStatePrefetch, IsXeHpcCore) {
109240
using STATE_PREFETCH = typename FamilyType::STATE_PREFETCH;
110241
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;

opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4941,6 +4941,16 @@ TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetMemAdviseIsCalledThenUp
49414941
}
49424942
}
49434943

4944+
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetMemPrefetchIsCalledThenReturnTrue) {
4945+
TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment);
4946+
BufferObject bo(mock, 1, 1024, 0);
4947+
4948+
DrmAllocation drmAllocation(0, AllocationType::UNIFIED_SHARED_MEMORY, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory);
4949+
EXPECT_EQ(&bo, drmAllocation.getBO());
4950+
4951+
EXPECT_TRUE(memoryManager.setMemPrefetch(&drmAllocation, rootDeviceIndex));
4952+
}
4953+
49444954
TEST_F(DrmMemoryManagerTest, givenPageFaultIsUnSupportedWhenCallingBindBoOnBufferAllocationThenAllocationShouldNotPageFaultAndExplicitResidencyIsNotRequired) {
49454955
auto executionEnvironment = std::make_unique<ExecutionEnvironment>();
49464956
executionEnvironment->prepareRootDeviceEnvironments(1);

opencl/test/unit_test/test_files/igdrcl.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ UseDrmVirtualEnginesForBcs = -1
389389
LimitEngineCountForVirtualBcs = -1
390390
LimitEngineCountForVirtualCcs = -1
391391
ForceRunAloneContext = -1
392+
AppendMemoryPrefetchForKmdMigratedSharedAllocations = -1
392393
CreateContextWithAccessCounters = -1
393394
AccessCountersTrigger = -1
394395
AccessCountersGranularity = -1

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseDrmVirtualEnginesForBcs, -1, "-1: default, 0:
187187
DECLARE_DEBUG_VARIABLE(int32_t, LimitEngineCountForVirtualBcs, -1, "-1: default, >0 Only use VirtualEngine with limited amount of engines, not max ")
188188
DECLARE_DEBUG_VARIABLE(int32_t, LimitEngineCountForVirtualCcs, -1, "-1: default, >0 Only use VirtualEngine with limited amount of engines, not max ")
189189
DECLARE_DEBUG_VARIABLE(int32_t, CreateContextWithAccessCounters, -1, "-1: default, 0: ignore, 1: create context with Access Counter programming")
190+
DECLARE_DEBUG_VARIABLE(int32_t, AppendMemoryPrefetchForKmdMigratedSharedAllocations, -1, "-1: default, 0: ignore, 1: allow prefetching shared memory to the device associated with the specified command list")
190191
DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersTrigger, -1, "-1: default - disabled, 0: disabled, >= 0: triggering thresholds")
191192
DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersGranularity, -1, "-1: default - ACG_2MB, >= 0: granularites - 0: ACG_128K, 1: ACG_2M, 2: ACG_16M, 3: ACG_16M")
192193
DECLARE_DEBUG_VARIABLE(int32_t, OverridePatIndex, -1, "-1: default, >=0: PatIndex to override")

shared/source/memory_manager/memory_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ class MemoryManager {
219219
virtual void registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex){};
220220

221221
virtual bool setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdviseFlags flags, uint32_t rootDeviceIndex) { return true; }
222+
virtual bool setMemPrefetch(GraphicsAllocation *gfxAllocation, uint32_t rootDeviceIndex) { return true; }
222223

223224
bool isExternalAllocation(AllocationType allocationType);
224225
LocalMemoryUsageBankSelector *getLocalMemoryUsageBankSelector(AllocationType allocationType, uint32_t rootDeviceIndex);

shared/source/os_interface/linux/drm_allocation.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,20 @@ bool DrmAllocation::setMemAdvise(Drm *drm, MemAdviseFlags flags) {
291291
return success;
292292
}
293293

294+
bool DrmAllocation::setMemPrefetch(Drm *drm) {
295+
bool success = true;
296+
auto ioctlHelper = drm->getIoctlHelper();
297+
298+
for (auto bo : bufferObjects) {
299+
if (bo != nullptr) {
300+
auto region = static_cast<uint32_t>((I915_MEMORY_CLASS_DEVICE << 16u) | 0u);
301+
success &= ioctlHelper->setVmPrefetch(drm, bo->peekAddress(), bo->peekSize(), region);
302+
}
303+
}
304+
305+
return success;
306+
}
307+
294308
void DrmAllocation::registerMemoryToUnmap(void *pointer, size_t size, DrmAllocation::MemoryUnmapFunction unmapFunction) {
295309
this->memoryToUnmap.push_back({pointer, size, unmapFunction});
296310
}

shared/source/os_interface/linux/drm_allocation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class DrmAllocation : public GraphicsAllocation {
8585
void setCachePolicy(CachePolicy memType);
8686

8787
bool setMemAdvise(Drm *drm, MemAdviseFlags flags);
88+
bool setMemPrefetch(Drm *drm);
8889

8990
void *getMmapPtr() { return this->mmapPtr; }
9091
void setMmapPtr(void *ptr) { this->mmapPtr = ptr; }

shared/source/os_interface/linux/drm_memory_manager.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,12 @@ bool DrmMemoryManager::setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdvise
229229
return drmAllocation->setMemAdvise(&this->getDrm(rootDeviceIndex), flags);
230230
}
231231

232+
bool DrmMemoryManager::setMemPrefetch(GraphicsAllocation *gfxAllocation, uint32_t rootDeviceIndex) {
233+
auto drmAllocation = static_cast<DrmAllocation *>(gfxAllocation);
234+
235+
return drmAllocation->setMemPrefetch(&this->getDrm(rootDeviceIndex));
236+
}
237+
232238
NEO::BufferObject *DrmMemoryManager::allocUserptr(uintptr_t address, size_t size, uint64_t flags, uint32_t rootDeviceIndex) {
233239
drm_i915_gem_userptr userptr = {};
234240
userptr.user_ptr = address;

shared/source/os_interface/linux/drm_memory_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class DrmMemoryManager : public MemoryManager {
6868
bool isKmdMigrationAvailable(uint32_t rootDeviceIndex) override;
6969

7070
bool setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdviseFlags flags, uint32_t rootDeviceIndex) override;
71+
bool setMemPrefetch(GraphicsAllocation *gfxAllocation, uint32_t rootDeviceIndex) override;
7172

7273
std::unique_lock<std::mutex> acquireAllocLock();
7374
std::vector<GraphicsAllocation *> &getSysMemAllocs();

0 commit comments

Comments
 (0)