Skip to content

Commit 6d85028

Browse files
Set Uncached MOCS for List only given stateless uncached args
Signed-off-by: Spruit, Neil R <[email protected]>
1 parent a240129 commit 6d85028

File tree

13 files changed

+21
-21
lines changed

13 files changed

+21
-21
lines changed

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ struct CommandList : _ze_command_list_handle_t {
250250
bool isFlushTaskSubmissionEnabled = false;
251251
bool isSyncModeQueue = false;
252252
bool commandListSLMEnabled = false;
253-
bool requiresUncachedMOCS = false;
253+
bool requiresQueueUncachedMocs = false;
254254

255255
protected:
256256
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
109109

110110
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
111111
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
112-
this->requiresUncachedMOCS = this->containsStatelessUncachedResource;
112+
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
113113
uint32_t partitionCount = 0;
114114

115115
NEO::Device *neoDevice = device->getNEODevice();

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
225225

226226
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
227227
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
228+
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
228229
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
229230

230231
uint32_t partitionCount = 0;

level_zero/core/source/cmdqueue/cmdqueue_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct CommandQueueHw : public CommandQueueImp {
3535

3636
void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) override;
3737

38-
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream);
38+
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed);
3939
size_t estimateStateBaseAddressCmdSize();
4040
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream);
4141

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
8282
auto anyCommandListWithCooperativeKernels = false;
8383
auto anyCommandListWithoutCooperativeKernels = false;
8484

85-
cachedMOCSAllowed = true;
85+
bool cachedMOCSAllowed = true;
8686

8787
for (auto i = 0u; i < numCommandLists; i++) {
8888
auto commandList = CommandList::fromHandle(phCommandLists[i]);
@@ -100,7 +100,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
100100
anyCommandListWithoutCooperativeKernels = true;
101101
}
102102
// If the Command List has commands that require uncached MOCS, then any changes to the commands in the queue requires the uncached MOCS
103-
if (commandList->requiresUncachedMOCS && cachedMOCSAllowed == true) {
103+
if (commandList->requiresQueueUncachedMocs && cachedMOCSAllowed == true) {
104104
cachedMOCSAllowed = false;
105105
}
106106
}
@@ -314,7 +314,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
314314

315315
if (gsbaStateDirty) {
316316
auto indirectHeap = CommandList::fromHandle(phCommandLists[0])->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
317-
programStateBaseAddress(scratchSpaceController->calculateNewGSH(), indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(), child);
317+
programStateBaseAddress(scratchSpaceController->calculateNewGSH(), indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(), child, cachedMOCSAllowed);
318318
}
319319

320320
if (initialPreemptionMode) {

level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
namespace L0 {
3131

3232
template <GFXCORE_FAMILY gfxCoreFamily>
33-
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream) {
33+
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) {
3434
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
3535
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
3636

level_zero/core/source/cmdqueue/cmdqueue_imp.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ struct CommandQueueImp : public CommandQueue {
8181
ze_command_queue_mode_t getSynchronousMode() const;
8282
virtual void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) = 0;
8383
virtual bool getPreemptionCmdProgramming() = 0;
84-
bool cachedMOCSAllowed = true;
8584

8685
protected:
8786
MOCKABLE_VIRTUAL int submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,

level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
namespace L0 {
2222

2323
template <GFXCORE_FAMILY gfxCoreFamily>
24-
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream) {
24+
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) {
2525
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
2626
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
2727
if (NEO::ApiSpecificConfig::getBindlessConfiguration()) {

level_zero/core/source/kernel/kernel_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ struct KernelHw : public KernelImp {
7474
}
7575

7676
if (l3Enabled == false) {
77-
this->kernelRequiresUncachedMocsCount++;
77+
this->kernelRequiresQueueUncachedMocsCount++;
7878
}
7979

8080
NEO::Device *neoDevice = module->getDevice()->getNEODevice();

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ struct KernelImp : Kernel {
119119
uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; }
120120
bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; }
121121
bool getKernelRequiresUncachedMocs() { return (kernelRequiresUncachedMocsCount > 0); }
122+
bool getKernelRequiresQueueUncachedMocs() { return (kernelRequiresQueueUncachedMocsCount > 0); }
122123
void setKernelArgUncached(uint32_t index, bool val) { isArgUncached[index] = val; }
123124

124125
uint32_t *getGlobalOffsets() override {
@@ -202,6 +203,7 @@ struct KernelImp : Kernel {
202203

203204
bool kernelRequiresGenerationOfLocalIdsByRuntime = true;
204205
uint32_t kernelRequiresUncachedMocsCount = false;
206+
uint32_t kernelRequiresQueueUncachedMocsCount = false;
205207
std::vector<bool> isArgUncached;
206208

207209
uint32_t globalOffsets[3] = {};

0 commit comments

Comments
 (0)