Skip to content

Commit 583a57c

Browse files
Use defaultQueueSurfaceAddress arg instead of patchToken
Use KernelDescriptor's defaultQueueSurfaceAddress arg instead of storing SPatchAllocateStatelessDefaultDeviceQueueSurface token in KernelInfo's patchInfo. Related-To: NEO-4729 Signed-off-by: Krystian Chmielewski <[email protected]>
1 parent e9e78e8 commit 583a57c

File tree

13 files changed

+95
-131
lines changed

13 files changed

+95
-131
lines changed

opencl/source/kernel/kernel.cpp

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -343,13 +343,10 @@ cl_int Kernel::initialize() {
343343
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
344344
}
345345

346-
if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
347-
348-
if (requiresSshForBuffers(rootDeviceIndex)) {
349-
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
350-
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
351-
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
352-
}
346+
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)) {
347+
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
348+
kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful);
349+
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
353350
}
354351

355352
setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy());
@@ -2444,23 +2441,17 @@ void Kernel::provideInitializationHints() {
24442441
}
24452442

24462443
void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) {
2447-
24482444
auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex();
2449-
const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo;
2450-
if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
2451-
if (kernelDeviceInfos[rootDeviceIndex].crossThreadData) {
2452-
auto patchLocation = ptrOffset(reinterpret_cast<uint32_t *>(getCrossThreadData(rootDeviceIndex)),
2453-
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset);
2454-
2455-
patchWithRequiredSize(patchLocation, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize,
2456-
static_cast<uintptr_t>(devQueue->getQueueBuffer()->getGpuAddressToPatch()));
2457-
}
2458-
if (requiresSshForBuffers(rootDeviceIndex)) {
2459-
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
2460-
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
2461-
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
2462-
(void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0);
2463-
}
2445+
const auto &defaultQueueSurfaceAddress = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress;
2446+
if (isValidOffset(defaultQueueSurfaceAddress.stateless) && kernelDeviceInfos[rootDeviceIndex].crossThreadData) {
2447+
auto patchLocation = ptrOffset(reinterpret_cast<uint32_t *>(getCrossThreadData(rootDeviceIndex)), defaultQueueSurfaceAddress.stateless);
2448+
patchWithRequiredSize(patchLocation, defaultQueueSurfaceAddress.pointerSize,
2449+
static_cast<uintptr_t>(devQueue->getQueueBuffer()->getGpuAddressToPatch()));
2450+
}
2451+
if (isValidOffset(defaultQueueSurfaceAddress.bindful)) {
2452+
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)), defaultQueueSurfaceAddress.bindful);
2453+
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
2454+
(void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0);
24642455
}
24652456
}
24662457

opencl/source/kernel/kernel.inl

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,6 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf
2424
for (uint32_t i = 0; i < blockCount; i++) {
2525
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
2626

27-
// clang-format off
28-
uint64_t defaultQueueOffset = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface ?
29-
pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset : ReflectionSurfaceHelper::undefinedOffset;
30-
uint64_t deviceQueueOffset = ReflectionSurfaceHelper::undefinedOffset;
31-
32-
uint32_t defaultQueueSize = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface ?
33-
pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize : 0;
34-
uint32_t deviceQueueSize = 0;
35-
// clang-format on
36-
3727
uint64_t printfBufferOffset = ReflectionSurfaceHelper::undefinedOffset;
3828
uint32_t printfBufferPatchSize = 0U;
3929
const auto &printfSurface = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress;
@@ -51,6 +41,17 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf
5141
eventPoolSize = eventPoolSurfaceAddress.pointerSize;
5242
}
5343

44+
uint64_t defaultQueueOffset = ReflectionSurfaceHelper::undefinedOffset;
45+
uint32_t defaultQueueSize = 0U;
46+
const auto &defaultQueueSurface = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress;
47+
if (isValidOffset(defaultQueueSurface.stateless)) {
48+
defaultQueueOffset = defaultQueueSurface.stateless;
49+
defaultQueueSize = defaultQueueSurface.pointerSize;
50+
}
51+
52+
uint64_t deviceQueueOffset = ReflectionSurfaceHelper::undefinedOffset;
53+
uint32_t deviceQueueSize = 0;
54+
5455
uint64_t privateSurfaceOffset = ReflectionSurfaceHelper::undefinedOffset;
5556
uint32_t privateSurfacePatchSize = 0;
5657
uint64_t privateSurfaceGpuAddress = 0;

opencl/source/program/kernel_info.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -314,11 +314,6 @@ void KernelInfo::storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfac
314314
patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = pStatelessGlobalMemorySurfaceWithInitializationArg;
315315
}
316316

317-
void KernelInfo::storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg) {
318-
usesSsh |= true;
319-
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = pStatelessDefaultDeviceQueueSurfaceArg;
320-
}
321-
322317
void KernelInfo::storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo) {
323318
this->patchInfo.pKernelAttributesInfo = pKernelAttributesInfo;
324319
attributes = reinterpret_cast<const char *>(pKernelAttributesInfo) + sizeof(SPatchKernelAttributesInfo);

opencl/source/program/kernel_info.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ struct KernelInfo {
115115
void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg);
116116
void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg);
117117
void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg);
118-
void storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg);
119118
void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo);
120119
void storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface);
121120
void storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer);

opencl/source/program/kernel_info_from_patchtokens.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -184,13 +184,6 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
184184
storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrivateSurface);
185185
storeTokenIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization);
186186
storeTokenIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization);
187-
if (nullptr != src.tokens.allocateStatelessEventPoolSurface) {
188-
dst.usesSsh = true;
189-
}
190-
if (nullptr != src.tokens.allocateStatelessPrintfSurface) {
191-
dst.usesSsh = true;
192-
}
193-
storeTokenIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface);
194187
storeTokenIfNotNull(dst, src.tokens.allocateSyncBuffer);
195188

196189
dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr);

opencl/source/program/patch_info.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ struct PatchInfo {
6767
const SPatchAllocateSyncBuffer *pAllocateSyncBuffer = nullptr;
6868
const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr;
6969
const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr;
70-
const SPatchAllocateStatelessDefaultDeviceQueueSurface *pAllocateStatelessDefaultDeviceQueueSurface = nullptr;
7170
const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr;
7271
};
7372

opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -454,18 +454,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq
454454

455455
pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
456456

457-
const auto &patchInfo = parentKernel->getKernelInfo(rootDeviceIndex).patchInfo;
458-
459-
if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
460-
auto patchLocation = ptrOffset(reinterpret_cast<uint64_t *>(parentKernel->getCrossThreadData(rootDeviceIndex)),
461-
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset);
457+
const auto &implicitArgs = parentKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs;
462458

459+
const auto &defaultQueueSurfaceAddress = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress;
460+
if (isValidOffset(defaultQueueSurfaceAddress.stateless)) {
461+
auto patchLocation = ptrOffset(reinterpret_cast<uint64_t *>(parentKernel->getCrossThreadData(rootDeviceIndex)), defaultQueueSurfaceAddress.stateless);
463462
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *patchLocation);
464463
}
465464

466-
const auto &eventPool = parentKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress;
467-
if (isValidOffset(eventPool.stateless)) {
468-
auto patchLocation = ptrOffset(reinterpret_cast<uint64_t *>(parentKernel->getCrossThreadData(rootDeviceIndex)), eventPool.stateless);
465+
const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress;
466+
if (isValidOffset(eventPoolSurfaceAddress.stateless)) {
467+
auto patchLocation = ptrOffset(reinterpret_cast<uint64_t *>(parentKernel->getCrossThreadData(rootDeviceIndex)), eventPoolSurfaceAddress.stateless);
469468
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *patchLocation);
470469
}
471470
}
@@ -486,18 +485,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq
486485
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
487486

488487
for (uint32_t i = 0; i < blockCount; i++) {
489-
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
488+
const auto implicitArgs = blockManager->getBlockKernelInfo(i)->kernelDescriptor.payloadMappings.implicitArgs;
490489
const uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::getConstantBufferOffset(reflectionSurface, i);
491490

492-
uint32_t defaultQueueOffset = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset;
493-
uint32_t defaultQueueSize = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize;
494-
if (defaultQueueSize == sizeof(uint64_t)) {
495-
EXPECT_EQ_VAL(pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + defaultQueueOffset));
491+
const auto &defaultQueue = implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress;
492+
if (defaultQueue.pointerSize == sizeof(uint64_t)) {
493+
EXPECT_EQ_VAL(pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + defaultQueue.stateless));
496494
} else {
497-
EXPECT_EQ((uint32_t)pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + defaultQueueOffset));
495+
EXPECT_EQ((uint32_t)pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + defaultQueue.stateless));
498496
}
499497

500-
const auto &eventPoolSurfaceAddress = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress;
498+
const auto &eventPoolSurfaceAddress = implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress;
501499
if (eventPoolSurfaceAddress.pointerSize == sizeof(uint64_t)) {
502500
EXPECT_EQ_VAL(pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + eventPoolSurfaceAddress.stateless));
503501
} else {

opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -719,12 +719,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
719719
allocateStatelessEventPoolSurface.DataParamSize = 8;
720720
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessEventPoolSurface);
721721

722-
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
723-
AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 256;
724-
AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 32;
725-
AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8;
726-
727-
pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface;
722+
SPatchAllocateStatelessDefaultDeviceQueueSurface allocateStatelessDefaultDeviceQueueSurface;
723+
allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 256;
724+
allocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 32;
725+
allocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8;
726+
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessDefaultDeviceQueueSurface);
728727

729728
// create program with valid context
730729
MockContext context;

opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,13 +1065,14 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK
10651065
}
10661066
}
10671067

1068-
if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
1069-
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset);
1070-
if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize == sizeof(uint32_t)) {
1068+
const auto &defaultQueueSurfaceAddress = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress;
1069+
if (isValidOffset(defaultQueueSurfaceAddress.stateless)) {
1070+
auto *patchedPointer = ptrOffset(pCurbe, defaultQueueSurfaceAddress.stateless);
1071+
if (defaultQueueSurfaceAddress.pointerSize == sizeof(uint32_t)) {
10711072
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
10721073
uint64_t patchedValue64 = *patchedValue;
10731074
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64);
1074-
} else if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize == sizeof(uint64_t)) {
1075+
} else if (defaultQueueSurfaceAddress.pointerSize == sizeof(uint64_t)) {
10751076
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
10761077
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue);
10771078
}

0 commit comments

Comments
 (0)