Skip to content

Commit 3d92186

Browse files
Add heap sharing to immediate command lists
This change is intended to be used in immediate command lists that are using flush task functionality. With this change all immediate command list using the same csr will consume shared allocations for dsh and ssh heaps. This will decrease number of SBA commands dispatched when multiple command lists coexists and dispatch kernels. With this change new SBA command should be dispatched only when current heap allocation is exhausted. Functionality is currently disabled and available under debug key. Functionality will be enabled by default for all immediate command lists with flush task functionality enabled. Related-To: NEO-7142 Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent 71bef60 commit 3d92186

35 files changed

+671
-93
lines changed

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ struct CommandList : _ze_command_list_handle_t {
297297
bool commandListSLMEnabled = false;
298298
bool requiresQueueUncachedMocs = false;
299299
bool isBcsSplitNeeded = false;
300+
bool immediateCmdListHeapSharing = false;
300301

301302
protected:
302303
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
135135
commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled);
136136
}
137137

138+
if (this->immediateCmdListHeapSharing) {
139+
commandContainer.setImmediateCmdListCsr(this->csr);
140+
commandContainer.setNumIddPerBlock(1);
141+
}
142+
138143
commandContainer.setReservedSshSize(getReserveSshSize());
139144
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
140145
auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(), deviceImp->allocationsForReuse.get(), !isCopyOnly());

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
4343
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
4444
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
4545
}
46-
appendEventForProfiling(event, true, false);
4746
const auto kernelImmutableData = kernel->getImmutableData();
47+
if (this->immediateCmdListHeapSharing) {
48+
auto kernelInfo = kernelImmutableData->getKernelInfo();
49+
commandContainer.ensureHeapSizePrepared(
50+
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
51+
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(*kernelInfo));
52+
}
53+
appendEventForProfiling(event, true, false);
4854
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
4955
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
5056
this->setCommandListPerThreadScratchSize(perThreadScratchSize);
@@ -147,7 +153,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
147153
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
148154
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
149155

150-
if (neoDevice->getDebugger()) {
156+
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
151157
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
152158
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
153159
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;

level_zero/core/source/cmdlist/cmdlist_hw_immediate.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ constexpr size_t maxImmediateCommandSize = 4 * MemoryConstants::kiloByte;
2121

2222
template <GFXCORE_FAMILY gfxCoreFamily>
2323
struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFamily> {
24+
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
2425
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
25-
using BaseClass::executeCommandListImmediate;
26-
2726
using BaseClass::BaseClass;
27+
using BaseClass::executeCommandListImmediate;
2828

2929
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
3030
const ze_group_count_t *threadGroupDimensions,

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
#pragma once
99

10+
#include "shared/source/command_container/command_encoder.h"
11+
#include "shared/source/command_stream/command_stream_receiver_hw.h"
1012
#include "shared/source/command_stream/wait_status.h"
1113
#include "shared/source/helpers/hw_helper.h"
1214
#include "shared/source/helpers/hw_info.h"
@@ -143,6 +145,32 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
143145
this->csr->makeResident(*this->device->getDebugSurface());
144146
}
145147

148+
NEO::Device *neoDevice = this->device->getNEODevice();
149+
if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing) {
150+
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
151+
auto sshStateCopy = csrHw->getSshState();
152+
bool sshDirty = sshStateCopy.updateAndCheck(ssh);
153+
154+
if (sshDirty) {
155+
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
156+
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
157+
158+
NEO::EncodeSurfaceStateArgs args;
159+
args.outMemory = &surfaceState;
160+
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
161+
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
162+
args.mocs = this->device->getMOCS(false, false);
163+
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
164+
args.allocation = this->device->getDebugSurface();
165+
args.gmmHelper = neoDevice->getGmmHelper();
166+
args.useGlobalAtomics = false;
167+
args.areMultipleSubDevicesInContext = false;
168+
args.isDebuggerActive = true;
169+
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
170+
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
171+
}
172+
}
173+
146174
auto completionStamp = this->csr->flushTask(
147175
*commandStream,
148176
commandStreamStart,

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
139139
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
140140
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
141141
}
142+
if (this->immediateCmdListHeapSharing) {
143+
auto kernelInfo = kernelImmutableData->getKernelInfo();
144+
commandContainer.ensureHeapSizePrepared(
145+
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
146+
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredDsh(*kernelInfo));
147+
}
142148
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
143149
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
144150

@@ -265,7 +271,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
265271
}
266272
}
267273

268-
if (neoDevice->getDebugger()) {
274+
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
269275
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
270276
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
271277
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;

level_zero/core/source/cmdlist/cmdlist_imp.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
127127
UNRECOVERABLE_IF(nullptr == csr);
128128

129129
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock));
130+
commandList->csr = csr;
130131
commandList->internalUsage = internalUsage;
131132
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
132133
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
@@ -135,6 +136,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
135136
if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) {
136137
commandList->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get();
137138
}
139+
commandList->immediateCmdListHeapSharing = L0HwHelper::enableImmediateCmdListHeapSharing(commandList->isFlushTaskSubmissionEnabled);
138140
}
139141
returnValue = commandList->initialize(device, engineGroupType, desc->flags);
140142
if (returnValue != ZE_RESULT_SUCCESS) {
@@ -151,7 +153,6 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
151153
}
152154

153155
commandList->cmdQImmediate = commandQueue;
154-
commandList->csr = csr;
155156
commandList->isTbxMode = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX) || (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB);
156157
commandList->commandListPreemptionMode = device->getDevicePreemptionMode();
157158

level_zero/core/source/device/device_imp.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1092,7 +1092,9 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool
10921092
device->getSourceLevelDebugger()
10931093
->notifyNewDevice(osInterface ? osInterface->getDriverModel()->getDeviceHandle() : 0);
10941094
}
1095-
device->createSysmanHandle(isSubDevice);
1095+
if (device->getNEODevice()->getAllEngines()[0].commandStreamReceiver->getType() == NEO::CommandStreamReceiverType::CSR_HW) {
1096+
device->createSysmanHandle(isSubDevice);
1097+
}
10961098
device->resourcesReleased = false;
10971099

10981100
device->populateSubDeviceCopyEngineGroups();

level_zero/core/source/hw_helpers/l0_hw_helper.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,12 @@ bool L0HwHelper::enableStateComputeModeTracking() {
3939
return defaultValue;
4040
}
4141

42+
bool L0HwHelper::enableImmediateCmdListHeapSharing(bool cmdlistSupport) {
43+
bool enabled = false;
44+
if (NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get() != -1) {
45+
return !!NEO::DebugManager.flags.EnableImmediateCmdListHeapSharing.get();
46+
}
47+
return enabled;
48+
}
49+
4250
} // namespace L0

level_zero/core/source/hw_helpers/l0_hw_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class L0HwHelper {
3333
static bool enableFrontEndStateTracking();
3434
static bool enablePipelineSelectStateTracking();
3535
static bool enableStateComputeModeTracking();
36+
static bool enableImmediateCmdListHeapSharing(bool cmdlistSupport);
3637
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
3738
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
3839

0 commit comments

Comments
 (0)