Skip to content

Commit be90b9f

Browse files
Jaime ArteagaCompute-Runtime-Automation
authored andcommitted
Add support for ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED
Add support for device and shared allocations that use the ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED flag, whether the kernel using the memory is stateless or statefull. Signed-off-by: Jaime Arteaga <[email protected]>
1 parent ce53035 commit be90b9f

File tree

15 files changed

+518
-30
lines changed

15 files changed

+518
-30
lines changed

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ struct CommandList : _ze_command_list_handle_t {
186186
virtual ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType) = 0;
187187
virtual ~CommandList();
188188
NEO::CommandContainer commandContainer;
189+
bool getContainsStatelessUncachedResource() { return containsStatelessUncachedResource; }
189190

190191
protected:
191192
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
@@ -197,6 +198,7 @@ struct CommandList : _ze_command_list_handle_t {
197198
bool internalUsage = false;
198199
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
199200
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, size_t *offset);
201+
bool containsStatelessUncachedResource = false;
200202
};
201203

202204
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
6666
removeDeallocationContainerData();
6767
removeHostPtrAllocations();
6868
commandContainer.reset();
69+
containsStatelessUncachedResource = false;
6970

7071
if (!isCopyOnly()) {
7172
programStateBaseAddress(commandContainer, true);

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "shared/source/memory_manager/residency_container.h"
1717
#include "shared/source/unified_memory/unified_memory.h"
1818

19+
#include "level_zero/core/source/kernel/kernel_imp.h"
20+
1921
#include "pipe_control_args.h"
2022

2123
#include <algorithm>
@@ -70,14 +72,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
7072
this->indirectAllocationsAllowed = true;
7173
}
7274

75+
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
76+
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
77+
7378
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer,
7479
reinterpret_cast<const void *>(pThreadGroupDimensions),
7580
isIndirect,
7681
isPredicate,
7782
kernel,
7883
0,
7984
device->getNEODevice(),
80-
commandListPreemptionMode);
85+
commandListPreemptionMode,
86+
this->containsStatelessUncachedResource);
8187

8288
if (device->getNEODevice()->getDebugger()) {
8389
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);

level_zero/core/source/kernel/kernel_hw.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#pragma once
99

1010
#include "shared/source/command_container/command_encoder.h"
11+
#include "shared/source/gmm_helper/gmm.h"
12+
#include "shared/source/gmm_helper/gmm_helper.h"
1113
#include "shared/source/helpers/bindless_heaps_helper.h"
1214
#include "shared/source/helpers/hw_helper.h"
1315
#include "shared/source/helpers/string.h"
@@ -56,7 +58,13 @@ struct KernelHw : public KernelImp {
5658
bufferSizeForSsh += sizeTillEndOfSurface; // take address alignment offset into account
5759
bufferSizeForSsh = alignUp(bufferSizeForSsh, alignment);
5860

59-
auto mocs = this->module->getDevice()->getMOCS(true, false);
61+
bool l3Enabled = true;
62+
auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
63+
if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
64+
l3Enabled = false;
65+
}
66+
auto mocs = this->module->getDevice()->getMOCS(l3Enabled, false);
67+
6068
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
6169
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs,
6270
false, false, false, neoDevice->getNumAvailableDevices(),

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,11 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal
475475
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), arg, val);
476476
if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) {
477477
setBufferSurfaceState(argIndex, reinterpret_cast<void *>(val), allocation);
478+
} else {
479+
auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(allocation->getGpuAddress()));
480+
if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
481+
kernelRequiresUncachedMocs = true;
482+
}
478483
}
479484
residencyContainer[argIndex] = allocation;
480485

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ struct KernelImp : Kernel {
103103

104104
uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; }
105105
bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; }
106+
bool getKernelRequiresUncachedMocs() { return kernelRequiresUncachedMocs; }
106107

107108
protected:
108109
KernelImp() = default;
@@ -147,6 +148,7 @@ struct KernelImp : Kernel {
147148
uint32_t requiredWorkgroupOrder = 0u;
148149

149150
bool kernelRequiresGenerationOfLocalIdsByRuntime = true;
151+
bool kernelRequiresUncachedMocs = false;
150152
};
151153

152154
} // namespace L0

level_zero/core/source/memory/memory.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ ze_result_t DriverHandleImp::allocDeviceMem(ze_device_handle_t hDevice, const ze
164164
Device::fromHandle(hDevice)->getNEODevice()->getDeviceBitfield());
165165
unifiedMemoryProperties.allocationFlags.flags.shareable = 1u;
166166
unifiedMemoryProperties.device = Device::fromHandle(hDevice)->getNEODevice();
167+
168+
if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
169+
unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1;
170+
}
171+
167172
void *usmPtr =
168173
svmAllocsManager->createUnifiedMemoryAllocation(Device::fromHandle(hDevice)->getRootDeviceIndex(),
169174
size, unifiedMemoryProperties);
@@ -191,6 +196,10 @@ ze_result_t DriverHandleImp::allocSharedMem(ze_device_handle_t hDevice, const ze
191196
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, Device::fromHandle(device)->getNEODevice()->getDeviceBitfield());
192197
unifiedMemoryProperties.device = unifiedMemoryPropertiesDevice;
193198

199+
if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
200+
unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1;
201+
}
202+
194203
if (size > this->devices[0]->getDeviceInfo().maxMemAllocSize) {
195204
*ptr = nullptr;
196205
return ZE_RESULT_ERROR_UNSUPPORTED_SIZE;

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,20 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlus
394394
EXPECT_NE(cmdList.end(), itor);
395395
}
396396

397+
HWTEST_F(CommandListCreate, whenCommandListIsResetThenContainsStatelessUncachedResourceIsSetToFalse) {
398+
ze_result_t returnValue;
399+
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily,
400+
device,
401+
NEO::EngineGroupType::Compute,
402+
returnValue));
403+
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
404+
405+
returnValue = commandList->reset();
406+
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
407+
408+
EXPECT_FALSE(commandList->getContainsStatelessUncachedResource());
409+
}
410+
397411
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenResetThenStateBaseAddressNotProgrammed) {
398412
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
399413

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
*
66
*/
77

8+
#include "shared/source/gmm_helper/gmm.h"
9+
#include "shared/source/gmm_helper/gmm_helper.h"
810
#include "shared/source/helpers/state_base_address.h"
911
#include "shared/source/os_interface/device_factory.h"
1012
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
@@ -154,6 +156,28 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
154156
commandQueue->destroy();
155157
}
156158

159+
HWTEST2_F(CommandQueueProgramSBATest,
160+
whenProgrammingStateBaseAddressWithcontainsStatelessUncachedResourceThenCorrectMocsAreSet, CommandQueueSBASupport) {
161+
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
162+
ze_command_queue_desc_t desc = {};
163+
auto csr = std::unique_ptr<NEO::CommandStreamReceiver>(neoDevice->createCommandStreamReceiver());
164+
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, csr.get(), &desc);
165+
commandQueue->initialize(false, false);
166+
167+
uint32_t alignedSize = 4096u;
168+
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
169+
170+
commandQueue->programGeneralStateBaseAddress(0u, true, child);
171+
auto pSbaCmd = static_cast<STATE_BASE_ADDRESS *>(commandQueue->commandStream->getSpace(sizeof(STATE_BASE_ADDRESS)));
172+
uint32_t statelessMocsIndex = pSbaCmd->getStatelessDataPortAccessMemoryObjectControlState();
173+
174+
auto gmmHelper = device->getNEODevice()->getGmmHelper();
175+
uint32_t expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
176+
EXPECT_EQ(statelessMocsIndex, expectedMocs);
177+
178+
commandQueue->destroy();
179+
}
180+
157181
TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingNonCopyBlitCommandListThenWrongCommandListStatusReturned) {
158182
const ze_command_queue_desc_t desc = {};
159183

level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,58 @@ TEST_F(KernelImpPatchBindlessTest, GivenUndefiedBidfulAndBindlesstOffsetWhenSetA
759759

760760
EXPECT_FALSE(mockKernel.setSurfaceStateCalled);
761761
}
762+
763+
using KernelBindlessUncachedMemoryTests = Test<ModuleFixture>;
764+
765+
TEST_F(KernelBindlessUncachedMemoryTests, givenBindlessKernelAndAllocDataNoTfoundThenKernelRequiresUncachedMocsIsSet) {
766+
ze_kernel_desc_t desc = {};
767+
desc.pKernelName = kernelName.c_str();
768+
MyMockKernel mockKernel;
769+
770+
mockKernel.module = module.get();
771+
mockKernel.initialize(&desc);
772+
773+
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
774+
arg.bindless = undefined<CrossThreadDataOffset>;
775+
arg.bindful = undefined<SurfaceStateHeapOffset>;
776+
777+
NEO::MockGraphicsAllocation alloc;
778+
779+
mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
780+
EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
781+
}
782+
783+
TEST_F(KernelBindlessUncachedMemoryTests, givenDeviceAllocationWithUncachedFlagThenKernelRequiresUncachedMocsIsSet) {
784+
ze_kernel_desc_t desc = {};
785+
desc.pKernelName = kernelName.c_str();
786+
MyMockKernel mockKernel;
787+
788+
mockKernel.module = module.get();
789+
mockKernel.initialize(&desc);
790+
791+
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
792+
arg.bindless = undefined<CrossThreadDataOffset>;
793+
arg.bindful = undefined<SurfaceStateHeapOffset>;
794+
795+
void *devicePtr = nullptr;
796+
ze_device_mem_alloc_desc_t deviceDesc = {};
797+
deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
798+
ze_result_t res = device->getDriverHandle()->allocDeviceMem(device->toHandle(),
799+
&deviceDesc,
800+
16384u,
801+
0u,
802+
&devicePtr);
803+
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
804+
805+
auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
806+
EXPECT_NE(nullptr, alloc);
807+
808+
mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
809+
EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
810+
811+
device->getDriverHandle()->freeMem(devicePtr);
812+
}
813+
762814
template <GFXCORE_FAMILY gfxCoreFamily>
763815
struct MyMockImage : public WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>> {
764816
//MyMockImage() : WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>();

0 commit comments

Comments
 (0)