Skip to content

Commit 6151482

Browse files
performance: optimize surface state programming
- eliminate read-modify-write on gfx memory when global bindless mode enabled Signed-off-by: Mateusz Hoppe <[email protected]> Source: d5e275c
1 parent 215fbda commit 6151482

File tree

2 files changed

+29
-3
lines changed

2 files changed

+29
-3
lines changed

level_zero/core/source/kernel/patch_with_implicit_surface.inl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ inline void patchImplicitArgBindlessOffsetAndSetSurfaceState(ArrayRef<uint8_t> c
4242
auto &gfxCoreHelper = device.getGfxCoreHelper();
4343
void *surfaceStateAddress = nullptr;
4444
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
45+
bool useTempBuffer = false;
4546

4647
if (NEO::isValidOffset(ptr.bindless)) {
4748
if (device.getBindlessHeapsHelper()) {
@@ -50,6 +51,7 @@ inline void patchImplicitArgBindlessOffsetAndSetSurfaceState(ArrayRef<uint8_t> c
5051
auto patchLocation = ptrOffset(crossThreadData.begin(), ptr.bindless);
5152
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(ssInHeap.surfaceStateOffset));
5253
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
54+
useTempBuffer = true;
5355
} else {
5456
auto index = std::numeric_limits<uint32_t>::max();
5557
const auto &iter = kernelDescriptor.getBindlessOffsetToSurfaceState().find(ptr.bindless);
@@ -64,12 +66,18 @@ inline void patchImplicitArgBindlessOffsetAndSetSurfaceState(ArrayRef<uint8_t> c
6466
}
6567

6668
if (surfaceStateAddress) {
69+
std::unique_ptr<uint64_t[]> surfaceState;
70+
71+
if (useTempBuffer) {
72+
surfaceState = std::make_unique<uint64_t[]>(surfaceStateSize / sizeof(uint64_t));
73+
}
74+
6775
auto addressToPatch = allocation->getGpuAddress();
6876
size_t sizeToPatch = allocation->getUnderlyingBufferSize();
6977
auto isDebuggerActive = device.getDebugger() != nullptr;
7078

7179
NEO::EncodeSurfaceStateArgs args;
72-
args.outMemory = surfaceStateAddress;
80+
args.outMemory = useTempBuffer ? surfaceState.get() : surfaceStateAddress;
7381
args.graphicsAddress = addressToPatch;
7482
args.size = sizeToPatch;
7583
args.mocs = gfxCoreHelper.getMocsIndex(*device.getGmmHelper(), true, false) << 1;
@@ -81,5 +89,10 @@ inline void patchImplicitArgBindlessOffsetAndSetSurfaceState(ArrayRef<uint8_t> c
8189
args.isDebuggerActive = isDebuggerActive;
8290

8391
gfxCoreHelper.encodeBufferSurfaceState(args);
92+
93+
if (useTempBuffer) {
94+
memcpy_s(surfaceStateAddress, surfaceStateSize,
95+
surfaceState.get(), surfaceStateSize);
96+
}
8497
}
8598
}

level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,7 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic
595595
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
596596
savedSurfaceStateArgs = args;
597597
++encodeBufferSurfaceStateCalled;
598+
NEO::GfxCoreHelperHw<FamilyType>::encodeBufferSurfaceState(args);
598599
}
599600
};
600601

@@ -651,6 +652,7 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic
651652
}
652653

653654
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExplicitAndImplicitArgsAndBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
655+
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
654656
HardwareInfo hwInfo = *defaultHwInfo;
655657

656658
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
@@ -667,6 +669,7 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl
667669
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
668670
savedSurfaceStateArgs = args;
669671
++encodeBufferSurfaceStateCalled;
672+
NEO::GfxCoreHelperHw<FamilyType>::encodeBufferSurfaceState(args);
670673
}
671674
};
672675

@@ -724,12 +727,17 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExpl
724727
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
725728
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
726729

727-
EXPECT_EQ(globalConstBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory);
730+
EXPECT_NE(globalConstBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory);
731+
732+
const auto surfState = reinterpret_cast<RENDER_SURFACE_STATE *>(globalConstBuffer.getBindlessInfo().ssPtr);
733+
ASSERT_NE(nullptr, surfState);
734+
EXPECT_EQ(gpuAddress, surfState->getSurfaceBaseAddress());
728735
EXPECT_EQ(&globalConstBuffer, savedSurfaceStateArgs.allocation);
729736
}
730737
}
731738

732739
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplicitAndImplicitArgsAndBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
740+
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
733741
HardwareInfo hwInfo = *defaultHwInfo;
734742

735743
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
@@ -746,6 +754,7 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic
746754
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
747755
savedSurfaceStateArgs = args;
748756
++encodeBufferSurfaceStateCalled;
757+
NEO::GfxCoreHelperHw<FamilyType>::encodeBufferSurfaceState(args);
749758
}
750759
};
751760

@@ -803,7 +812,11 @@ HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplic
803812
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
804813
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
805814

806-
EXPECT_EQ(globalVarBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory);
815+
EXPECT_NE(globalVarBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory);
816+
817+
const auto surfState = reinterpret_cast<RENDER_SURFACE_STATE *>(globalVarBuffer.getBindlessInfo().ssPtr);
818+
ASSERT_NE(nullptr, surfState);
819+
EXPECT_EQ(gpuAddress, surfState->getSurfaceBaseAddress());
807820
EXPECT_EQ(&globalVarBuffer, savedSurfaceStateArgs.allocation);
808821
}
809822
}

0 commit comments

Comments
 (0)