Skip to content

Commit 62e56d2

Browse files
committed
Disable L3cache when resolve argument
Change-Id: I4bb3a18d67254eef8aa4a0ce6b29401726f0b47e
1 parent 43a66ad commit 62e56d2

File tree

12 files changed

+54
-23
lines changed

12 files changed

+54
-23
lines changed

runtime/built_ins/built_ins.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ void BuiltInOp<HWFamily, EBuiltInOps::AuxTranslation>::resizeKernelInstances(siz
6464

6565
for (size_t i = convertToNonAuxKernel.size(); i < size; i++) {
6666
auto clonedKernel1 = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr);
67+
clonedKernel1->setDisableL3forStatefulBuffers(true);
6768
auto clonedKernel2 = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr);
6869
clonedKernel1->cloneKernel(baseKernel);
6970
clonedKernel2->cloneKernel(baseKernel);

runtime/gtpin/gtpin_callbacks.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018 Intel Corporation
2+
* Copyright (C) 2018-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -136,7 +136,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
136136
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
137137
cl_mem buffer = (cl_mem)resource;
138138
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
139-
pBuffer->setArgStateful(pSurfaceState, false);
139+
pBuffer->setArgStateful(pSurfaceState, false, false);
140140
}
141141
}
142142

runtime/kernel/kernel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1131,7 +1131,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
11311131

11321132
if (requiresSshForBuffers()) {
11331133
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
1134-
buffer->setArgStateful(surfaceState, forceNonAuxMode);
1134+
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3forStatefulBuffers);
11351135
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
11361136
}
11371137
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());

runtime/kernel/kernel.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,10 @@ class Kernel : public BaseObject<_cl_kernel> {
381381
using CacheFlushAllocationsVec = StackVec<GraphicsAllocation *, 32>;
382382
void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const;
383383

384+
void setDisableL3forStatefulBuffers(bool disableL3forStatefulBuffers) {
385+
this->disableL3forStatefulBuffers = disableL3forStatefulBuffers;
386+
}
387+
384388
protected:
385389
struct ObjectCounts {
386390
uint32_t imageCount;
@@ -479,6 +483,8 @@ class Kernel : public BaseObject<_cl_kernel> {
479483
std::vector<KernelArgHandler> kernelArgHandlers;
480484
std::vector<GraphicsAllocation *> kernelSvmGfxAllocations;
481485

486+
bool disableL3forStatefulBuffers = false;
487+
482488
size_t numberOfBindingTableStates;
483489
size_t localBindingTableOffset;
484490
std::unique_ptr<char[]> pSshLocal;

runtime/mem_obj/buffer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ void Buffer::setSurfaceState(const Device *device,
508508
GraphicsAllocation *gfxAlloc,
509509
cl_mem_flags flags) {
510510
auto buffer = Buffer::createBufferHwFromDevice(device, flags, svmSize, svmPtr, svmPtr, gfxAlloc, true, false, false);
511-
buffer->setArgStateful(surfaceState, false);
511+
buffer->setArgStateful(surfaceState, false, false);
512512
buffer->graphicsAllocation = nullptr;
513513
delete buffer;
514514
}

runtime/mem_obj/buffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class Buffer : public MemObj {
104104
bool isValidSubBufferOffset(size_t offset);
105105
uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); }
106106
uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing);
107-
virtual void setArgStateful(void *memory, bool forceNonAuxMode) = 0;
107+
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) = 0;
108108
bool bufferRectPitchSet(const size_t *bufferOrigin,
109109
const size_t *region,
110110
size_t &bufferRowPitch,
@@ -162,7 +162,7 @@ class BufferHw : public Buffer {
162162
: Buffer(context, flags, size, memoryStorage, hostPtr, gfxAllocation,
163163
zeroCopy, isHostPtrSVM, isObjectRedescribed) {}
164164

165-
void setArgStateful(void *memory, bool forceNonAuxMode) override;
165+
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override;
166166

167167
static Buffer *create(Context *context,
168168
cl_mem_flags flags,

runtime/mem_obj/buffer.inl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ union SURFACE_STATE_BUFFER_LENGTH {
2727
};
2828

2929
template <typename GfxFamily>
30-
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
30+
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) {
3131
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
3232
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
3333
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
@@ -65,9 +65,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
6565
surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
6666
surfaceState->setVerticalLineStride(0);
6767
surfaceState->setVerticalLineStrideOffset(0);
68-
if (((isAligned<MemoryConstants::cacheLineSize>(bufferAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferSize)) ||
69-
isValueSet(getFlags(), CL_MEM_READ_ONLY) || !this->isMemObjZeroCopy()) &&
70-
!this->isUncacheable) {
68+
if (!disableL3Cache && ((isAligned<MemoryConstants::cacheLineSize>(bufferAddress) && isAligned<MemoryConstants::cacheLineSize>(bufferSize)) || isValueSet(getFlags(), CL_MEM_READ_ONLY) || !this->isMemObjZeroCopy()) && !this->isUncacheable) {
7169
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
7270
} else {
7371
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));

unit_tests/gen9/kernel_tests_gen9.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ GEN9TEST_F(Gen9KernelCommandsTest, givenBufferThatIsNotZeroCopyWhenSurfaceStatei
4646
auto gmmHelper = context.getDevice(0)->getExecutionEnvironment()->getGmmHelper();
4747
gmmHelper->setSimplifiedMocsTableUsage(true);
4848

49-
buffer->setArgStateful(&surfaceState, false);
49+
buffer->setArgStateful(&surfaceState, false, false);
5050
//make sure proper mocs is selected
5151
constexpr uint32_t expectedMocs = GmmHelper::cacheEnabledIndex;
5252
EXPECT_EQ(expectedMocs, surfaceState.getMemoryObjectControlStateIndexToMocsTables());

unit_tests/helpers/base_object_tests.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -73,7 +73,7 @@ class MockObject : public MockObjectBase<BaseType> {};
7373
template <>
7474
class MockObject<Buffer> : public MockObjectBase<Buffer> {
7575
public:
76-
void setArgStateful(void *memory, bool forceNonAuxMode) override {}
76+
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {}
7777
};
7878

7979
template <>
@@ -277,7 +277,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
277277
MockBuffer() : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data), &data, &data, &mockGfxAllocation, true, false, false) {
278278
}
279279

280-
void setArgStateful(void *memory, bool forceNonAuxMode) override {
280+
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {
281281
}
282282

283283
void setFakeOwnership() {

unit_tests/mem_obj/buffer_tests.cpp

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,7 +1260,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW
12601260
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
12611261
RENDER_SURFACE_STATE surfaceState = {};
12621262

1263-
buffer->setArgStateful(&surfaceState, false);
1263+
buffer->setArgStateful(&surfaceState, false, false);
12641264

12651265
auto surfBaseAddress = surfaceState.getSurfaceBaseAddress();
12661266
auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress();
@@ -1295,7 +1295,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT
12951295
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
12961296
RENDER_SURFACE_STATE surfaceState = {};
12971297

1298-
subBuffer->setArgStateful(&surfaceState, false);
1298+
subBuffer->setArgStateful(&surfaceState, false, false);
12991299

13001300
auto surfBaseAddress = surfaceState.getSurfaceBaseAddress();
13011301
auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress();
@@ -1308,6 +1308,32 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT
13081308
DebugManager.flags.Force32bitAddressing.set(false);
13091309
}
13101310

1311+
HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisabledIsCalledThenL3CacheShouldBeOff) {
1312+
MockContext context;
1313+
auto size = MemoryConstants::pageSize;
1314+
auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize);
1315+
auto retVal = CL_SUCCESS;
1316+
1317+
auto buffer = std::unique_ptr<Buffer>(Buffer::create(
1318+
&context,
1319+
CL_MEM_USE_HOST_PTR,
1320+
size,
1321+
ptr,
1322+
retVal));
1323+
EXPECT_EQ(CL_SUCCESS, retVal);
1324+
1325+
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1326+
RENDER_SURFACE_STATE surfaceState = {};
1327+
1328+
buffer->setArgStateful(&surfaceState, false, true);
1329+
1330+
auto mocs = surfaceState.getMemoryObjectControlState();
1331+
auto gmmHelper = device->getGmmHelper();
1332+
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs);
1333+
1334+
alignedFree(ptr);
1335+
}
1336+
13111337
HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceStateIsProgrammedThenSetAuxParams) {
13121338
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
13131339
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
@@ -1322,14 +1348,14 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState
13221348
buffer->getGraphicsAllocation()->gmm = gmm;
13231349
gmm->isRenderCompressed = true;
13241350

1325-
buffer->setArgStateful(&surfaceState, false);
1351+
buffer->setArgStateful(&surfaceState, false, false);
13261352

13271353
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
13281354
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState.getAuxiliarySurfaceMode());
13291355
EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT == surfaceState.getCoherencyType());
13301356

13311357
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
1332-
buffer->setArgStateful(&surfaceState, false);
1358+
buffer->setArgStateful(&surfaceState, false, false);
13331359
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode());
13341360
}
13351361

@@ -1346,7 +1372,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt
13461372
buffer->getGraphicsAllocation()->gmm = gmm;
13471373
gmm->isRenderCompressed = false;
13481374

1349-
buffer->setArgStateful(&surfaceState, false);
1375+
buffer->setArgStateful(&surfaceState, false, false);
13501376

13511377
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
13521378
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode());

0 commit comments

Comments
 (0)