Skip to content

Commit cf3817e

Browse files
Add debug flag for EOT WA
EOT WA requires allocating last 64KB of kernel heap and putting EOT signature at the last 16 bytes of kernel heap Related-To: NEO-7099 Signed-off-by: Mateusz Jablonski <[email protected]>
1 parent 9a66730 commit cf3817e

File tree

21 files changed

+271
-12
lines changed

21 files changed

+271
-12
lines changed

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,25 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingThenScratchAllocationI
500500
EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation));
501501
}
502502

503+
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenAdditionalAllocationForResidencyWhenFlushingThenHandleResidency) {
504+
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
505+
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
506+
507+
MockGraphicsAllocation allocation{};
508+
commandStreamReceiver->addAdditionalAllocationForResidency(&allocation);
509+
510+
flushTask(*commandStreamReceiver);
511+
512+
auto tagAllocation = commandStreamReceiver->getTagAllocation();
513+
ASSERT_NE(tagAllocation, nullptr);
514+
515+
EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation));
516+
EXPECT_TRUE(commandStreamReceiver->isMadeResident(&allocation));
517+
518+
EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation));
519+
EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(&allocation));
520+
}
521+
503522
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndFlushTaskIsCalledThenFenceAllocationIsMadeResident) {
504523
RAIIHwHelperFactory<MockHwHelperWithFenceAllocation<FamilyType>> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily};
505524

shared/source/command_stream/command_stream_receiver.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,9 @@ class CommandStreamReceiver {
337337
DispatchMode getDispatchMode() const {
338338
return this->dispatchMode;
339339
}
340+
void addAdditionalAllocationForResidency(GraphicsAllocation *graphicsAllocation) {
341+
this->additionalAllocationsForResidency.push_back(graphicsAllocation);
342+
}
340343

341344
LogicalStateHelper *getLogicalStateHelper() const;
342345

@@ -389,6 +392,7 @@ class CommandStreamReceiver {
389392
GraphicsAllocation *perDssBackedBuffer = nullptr;
390393
GraphicsAllocation *clearColorAllocation = nullptr;
391394
GraphicsAllocation *workPartitionAllocation = nullptr;
395+
StackVec<GraphicsAllocation *, 1> additionalAllocationsForResidency;
392396

393397
MultiGraphicsAllocation *tagsMultiAllocation = nullptr;
394398

shared/source/command_stream/command_stream_receiver_hw_base.inl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
515515

516516
this->makeResident(*tagAllocation);
517517

518+
for (auto &additionalAllocationForResidency : additionalAllocationsForResidency) {
519+
this->makeResident(*additionalAllocationForResidency);
520+
}
521+
518522
if (globalFenceAllocation) {
519523
makeResident(*globalFenceAllocation);
520524
}
@@ -772,6 +776,9 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
772776
for (auto &surface : resourcePackage) {
773777
surfacesForSubmit.push_back(surface);
774778
}
779+
for (auto &additionalAllocationForResidency : additionalAllocationsForResidency) {
780+
surfacesForSubmit.push_back(additionalAllocationForResidency);
781+
}
775782

776783
// make sure we flush DC if needed
777784
if (epiloguePipeControlLocation && MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo)) {

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ DECLARE_DEBUG_VARIABLE(bool, PrintMemoryRegionSizes, false, "print memory bank t
7474
DECLARE_DEBUG_VARIABLE(bool, UpdateCrossThreadDataSize, false, "Turn on cross thread data size calculation for PATCH TOKEN binary")
7575
DECLARE_DEBUG_VARIABLE(bool, UseNewQueryTopoIoctl, true, "Use DRM_I915_QUERY_COMPUTE_SLICES")
7676
DECLARE_DEBUG_VARIABLE(bool, DisableGpuHangDetection, false, "Disable GPU hang detection")
77+
DECLARE_DEBUG_VARIABLE(bool, EnableEotWa, false, "Enable WA to program EOT instruction at the end of kernel heap")
7778
DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing")
7879
DECLARE_DEBUG_VARIABLE(std::string, FilterDeviceId, std::string("unk"), "Device id filter, adapter matching device id will be opened. Ignored when unk.")
7980
DECLARE_DEBUG_VARIABLE(std::string, FilterBdfPath, std::string("unk"), "Linux-only, BDF path filter, only matching paths will be opened. Ignored when unk.")

shared/source/device/device.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ Device::~Device() {
5959

6060
syncBufferHandler.reset();
6161
commandStreamReceivers.clear();
62+
if (kernelEotWaAllocation) {
63+
executionEnvironment->memoryManager->freeGraphicsMemory(kernelEotWaAllocation);
64+
}
6265
executionEnvironment->memoryManager->waitForDeletions();
6366

6467
executionEnvironment->decRefInternal();
@@ -215,10 +218,22 @@ bool Device::createDeviceImpl() {
215218
if (getDebugger() && hwHelper.disableL3CacheForDebug(hwInfo)) {
216219
getGmmHelper()->forceAllResourcesUncached();
217220
}
221+
if (DebugManager.flags.EnableEotWa.get()) {
222+
AllocationProperties allocationProperties{rootDeviceIndex, MemoryConstants::pageSize64k, AllocationType::KERNEL_ISA, deviceBitfield};
223+
auto memoryManager = executionEnvironment->memoryManager.get();
224+
auto heapBase = memoryManager->getInternalHeapBaseAddress(rootDeviceIndex, memoryManager->isLocalMemoryUsedForIsa(rootDeviceIndex));
225+
allocationProperties.gpuAddress = heapBase + 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k;
226+
kernelEotWaAllocation = executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties);
227+
}
218228

219229
if (!createEngines()) {
220230
return false;
221231
}
232+
if (kernelEotWaAllocation) {
233+
auto memoryManager = executionEnvironment->memoryManager.get();
234+
uint8_t eotMemoryPattern[]{0x31, 0x09, 0x0C, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0C, 0x7F, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00};
235+
memoryManager->copyMemoryToAllocation(kernelEotWaAllocation, MemoryConstants::pageSize64k - sizeof(eotMemoryPattern) - MemoryConstants::pageSize, eotMemoryPattern, sizeof(eotMemoryPattern));
236+
}
222237

223238
getDefaultEngine().osContext->setDefaultContext(true);
224239

@@ -373,6 +388,11 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
373388
addEngineToEngineGroup(engine);
374389
}
375390

391+
if (kernelEotWaAllocation) {
392+
if (!EngineHelpers::isBcs(engineType)) {
393+
commandStreamReceiver->addAdditionalAllocationForResidency(kernelEotWaAllocation);
394+
}
395+
}
376396
commandStreamReceivers.push_back(std::move(commandStreamReceiver));
377397

378398
return true;

shared/source/device/device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ class Device : public ReferenceTrackedObject<Device> {
202202

203203
uintptr_t specializedDevice = reinterpret_cast<uintptr_t>(nullptr);
204204

205+
GraphicsAllocation *kernelEotWaAllocation = nullptr;
205206
GraphicsAllocation *rtMemoryBackedBuffer = nullptr;
206207
std::vector<GraphicsAllocation *> rtDispatchGlobals;
207208
struct {

shared/source/device/root_device.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019-2021 Intel Corporation
2+
* Copyright (C) 2019-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -67,6 +67,9 @@ void RootDevice::initializeRootCommandStreamReceiver() {
6767
rootCommandStreamReceiver->initializeTagAllocation();
6868
rootCommandStreamReceiver->createGlobalFenceAllocation();
6969
rootCommandStreamReceiver->createWorkPartitionAllocation(*this);
70+
if (kernelEotWaAllocation) {
71+
rootCommandStreamReceiver->addAdditionalAllocationForResidency(kernelEotWaAllocation);
72+
}
7073
commandStreamReceivers.push_back(std::move(rootCommandStreamReceiver));
7174

7275
EngineControl engine{commandStreamReceivers.back().get(), osContext};

shared/source/memory_manager/gfx_partition.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019-2021 Intel Corporation
2+
* Copyright (C) 2019-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -237,7 +237,11 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe
237237
heapInitExternalWithFrontWindow(HeapAssigner::mapExternalWindowIndex(heap), heapAllocate(heap, externalFrontWindowSize),
238238
externalFrontWindowSize);
239239
} else if (HeapAssigner::isInternalHeap(heap)) {
240-
heapInitWithFrontWindow(heap, gfxBase, gfxHeap32Size, GfxPartition::internalFrontWindowPoolSize);
240+
auto heapSize = gfxHeap32Size;
241+
if (DebugManager.flags.EnableEotWa.get()) {
242+
heapSize = 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k;
243+
}
244+
heapInitWithFrontWindow(heap, gfxBase, heapSize, GfxPartition::internalFrontWindowPoolSize);
241245
heapInitFrontWindow(HeapAssigner::mapInternalWindowIndex(heap), gfxBase, GfxPartition::internalFrontWindowPoolSize);
242246
} else {
243247
heapInit(heap, gfxBase, gfxHeap32Size);

shared/source/memory_manager/memory_manager.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,8 +434,9 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo
434434
}
435435

436436
allocationData.hostPtr = hostPtr;
437-
if (properties.allocationType == AllocationType::KERNEL_ISA ||
438-
properties.allocationType == AllocationType::KERNEL_ISA_INTERNAL) {
437+
if ((properties.allocationType == AllocationType::KERNEL_ISA ||
438+
properties.allocationType == AllocationType::KERNEL_ISA_INTERNAL) &&
439+
properties.gpuAddress == 0) {
439440
allocationData.size = properties.size + hwHelper.getPaddingForISAAllocation();
440441
} else {
441442
allocationData.size = properties.size;

shared/source/os_interface/linux/drm_memory_manager.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,13 @@ DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemoryImpl(const Allocatio
617617
size_t alignedAllocationSize = alignUp(allocationData.size, MemoryConstants::pageSize);
618618
auto allocationSize = alignedAllocationSize;
619619
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
620-
auto gpuVA = gfxPartition->heapAllocate(allocatorToUse, allocationSize);
620+
uint64_t gpuVA;
621621

622+
if (GraphicsAllocation::isIsaAllocationType(allocationData.type) && allocationData.gpuAddress != 0) {
623+
gpuVA = allocationData.gpuAddress;
624+
} else {
625+
gpuVA = gfxPartition->heapAllocate(allocatorToUse, allocationSize);
626+
}
622627
if (!gpuVA) {
623628
return nullptr;
624629
}
@@ -1403,9 +1408,15 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
14031408

14041409
auto sizeAllocated = sizeAligned;
14051410
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
1406-
auto gpuAddress = getGpuAddress(this->alignmentSelector, this->heapAssigner, *hwInfo,
1407-
allocationData.type, gfxPartition, sizeAllocated,
1408-
allocationData.hostPtr, allocationData.flags.resource48Bit, allocationData.flags.use32BitFrontWindow, *gmmHelper);
1411+
uint64_t gpuAddress;
1412+
1413+
if (GraphicsAllocation::isIsaAllocationType(allocationData.type) && allocationData.gpuAddress != 0) {
1414+
gpuAddress = allocationData.gpuAddress;
1415+
} else {
1416+
gpuAddress = getGpuAddress(this->alignmentSelector, this->heapAssigner, *hwInfo,
1417+
allocationData.type, gfxPartition, sizeAllocated,
1418+
allocationData.hostPtr, allocationData.flags.resource48Bit, allocationData.flags.use32BitFrontWindow, *gmmHelper);
1419+
}
14091420
auto canonizedGpuAddress = gmmHelper->canonize(gpuAddress);
14101421
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, numHandles, allocationData.type, nullptr, nullptr, canonizedGpuAddress, sizeAligned, MemoryPool::LocalMemory);
14111422
DrmAllocation *drmAllocation = static_cast<DrmAllocation *>(allocation.get());

0 commit comments

Comments
 (0)