Skip to content

Commit 9e8a434

Browse files
Add new parameter to dispatch payload data
Change-Id: I0034c5a40de65a050e19691b13793b7053354757
1 parent 5e3df95 commit 9e8a434

File tree

6 files changed

+49
-23
lines changed

6 files changed

+49
-23
lines changed

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!groovy
22
neoDependenciesRev='800243-1090'
33
strategy='EQUAL'
4-
allowedCD=271
4+
allowedCD=272
55
allowedF=4

runtime/command_queue/gpgpu_walker.inl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,13 +553,15 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
553553

554554
// Send our indirect object data
555555
size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1};
556+
size_t globalWorkSizes[3] = {scheduler.getGws(), 1, 1};
556557

557558
// Create indirectHeap for IOH that is located at the end of device enqueue DSH
558559
size_t curbeOffset = devQueueHw.setSchedulerCrossThreadData(scheduler);
559560
IndirectHeap indirectObjectHeap(dsh->getCpuBase(), dsh->getMaxAvailableSpace());
560561
indirectObjectHeap.getSpace(curbeOffset);
561562
ioh = &indirectObjectHeap;
562563

564+
bool localIdsGeneration = KernelCommandsHelper<GfxFamily>::isDispatchForLocalIdsGeneration(1, globalWorkSizes, localWorkSizes);
563565
auto offsetCrossThreadData = KernelCommandsHelper<GfxFamily>::sendIndirectState(
564566
*commandStream,
565567
*dsh,
@@ -571,7 +573,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
571573
offsetInterfaceDescriptorTable,
572574
interfaceDescriptorIndex,
573575
preemptionMode,
574-
nullptr);
576+
nullptr,
577+
localIdsGeneration);
575578

576579
// Implement enabling special WA DisableLSQCROPERFforOCL if needed
577580
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(commandStream, scheduler, true);

runtime/command_queue/hardware_interface/hardware_interface.inl

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
174174

175175
// Send our indirect object data
176176
size_t localWorkSizes[3] = {lws.x, lws.y, lws.z};
177+
size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z};
177178

178179
dispatchProfilingPerfStartCommands(dispatchInfo, multiDispatchInfo, hwTimeStamps,
179180
hwPerfCounter, commandStream, commandQueue);
@@ -197,6 +198,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
197198

198199
auto idd = obtainInterfaceDescriptorData(pWalkerCmd);
199200

201+
bool localIdsGeneration = KernelCommandsHelper<GfxFamily>::isDispatchForLocalIdsGeneration(dim, globalWorkSizes, localWorkSizes);
200202
auto offsetCrossThreadData = KernelCommandsHelper<GfxFamily>::sendIndirectState(
201203
*commandStream,
202204
*dsh,
@@ -208,7 +210,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
208210
offsetInterfaceDescriptorTable,
209211
interfaceDescriptorIndex,
210212
preemptionMode,
211-
idd);
213+
idd,
214+
localIdsGeneration);
212215

213216
size_t globalOffsets[3] = {offset.x, offset.y, offset.z};
214217
size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z};
@@ -218,21 +221,24 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
218221

219222
DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0);
220223
setOffsetCrossThreadData(pWalkerCmd, offsetCrossThreadData, interfaceDescriptorIndex);
224+
auto sizeCrossThreadData = kernel.getCrossThreadDataSize();
221225

222-
auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload;
223-
DEBUG_BREAK_IF(nullptr == threadPayload);
226+
size_t sizePerThreadDataTotal = 0;
227+
if (localIdsGeneration) {
228+
auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload;
229+
DEBUG_BREAK_IF(nullptr == threadPayload);
224230

225-
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
226-
auto localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, numChannels);
227-
localIdSizePerThread = std::max(localIdSizePerThread, sizeof(GRF));
231+
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
232+
auto localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, numChannels);
233+
localIdSizePerThread = std::max(localIdSizePerThread, sizeof(GRF));
228234

229-
auto sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkSize) * localIdSizePerThread;
230-
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group
235+
sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkSize) * localIdSizePerThread;
236+
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group
237+
}
231238

232-
auto sizeCrossThreadData = kernel.getCrossThreadDataSize();
233-
auto IndirectDataLength = alignUp(static_cast<uint32_t>(sizeCrossThreadData + sizePerThreadDataTotal),
239+
auto indirectDataLength = alignUp(static_cast<uint32_t>(sizeCrossThreadData + sizePerThreadDataTotal),
234240
WALKER_TYPE<GfxFamily>::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
235-
pWalkerCmd->setIndirectDataLength(IndirectDataLength);
241+
pWalkerCmd->setIndirectDataLength(indirectDataLength);
236242

237243
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
238244
currentDispatchIndex++;

runtime/helpers/kernel_commands.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
8989
const uint64_t offsetInterfaceDescriptorTable,
9090
const uint32_t interfaceDescriptorIndex,
9191
PreemptionMode preemptionMode,
92-
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor);
92+
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
93+
bool localIdsGeneration);
9394

9495
static size_t getSizeRequiredCS();
9596
static bool isPipeControlWArequired();
@@ -151,5 +152,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
151152
static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t);
152153

153154
static bool doBindingTablePrefetch();
155+
156+
static bool isDispatchForLocalIdsGeneration(uint32_t workDim, size_t *gws, size_t *lws);
154157
};
155158
} // namespace OCLRT

runtime/helpers/kernel_commands.inl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
294294
const uint64_t offsetInterfaceDescriptorTable,
295295
const uint32_t interfaceDescriptorIndex,
296296
PreemptionMode preemptionMode,
297-
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor) {
297+
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
298+
bool localIdsGeneration) {
298299
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
299300

300301
DEBUG_BREAK_IF(simd != 8 && simd != 16 && simd != 32);
@@ -422,4 +423,9 @@ template <typename GfxFamily>
422423
bool KernelCommandsHelper<GfxFamily>::doBindingTablePrefetch() {
423424
return true;
424425
}
426+
427+
template <typename GfxFamily>
428+
bool KernelCommandsHelper<GfxFamily>::isDispatchForLocalIdsGeneration(uint32_t workDim, size_t *gws, size_t *lws) {
429+
return true;
430+
}
425431
} // namespace OCLRT

unit_tests/helpers/kernel_commands_tests.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, sendIndirectStateResourceUsage)
317317
IDToffset,
318318
0,
319319
pDevice->getPreemptionMode(),
320-
nullptr);
320+
nullptr,
321+
true);
321322

322323
// It's okay these are EXPECT_GE as they're only going to be used for
323324
// estimation purposes to avoid OOM.
@@ -364,7 +365,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWithFourBindingTableE
364365
0,
365366
0,
366367
pDevice->getPreemptionMode(),
367-
nullptr);
368+
nullptr,
369+
true);
368370

369371
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
370372
if (KernelCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
@@ -404,7 +406,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelThatIsSchedulerWhenIn
404406
0,
405407
0,
406408
pDevice->getPreemptionMode(),
407-
nullptr);
409+
nullptr,
410+
true);
408411

409412
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
410413
EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount());
@@ -438,7 +441,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, givenKernelWith100BindingTableEn
438441
0,
439442
0,
440443
pDevice->getPreemptionMode(),
441-
nullptr);
444+
nullptr,
445+
true);
442446

443447
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
444448
if (KernelCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
@@ -503,7 +507,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, whenSendingIndirectStateThenKern
503507
IDToffset,
504508
0,
505509
pDevice->getPreemptionMode(),
506-
nullptr);
510+
nullptr,
511+
true);
507512
size_t numThreads = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
508513
numThreads = (numThreads + modifiedKernelInfo.getMaxSimdSize() - 1) / modifiedKernelInfo.getMaxSimdSize();
509514
size_t expectedIohSize = ((modifiedKernelInfo.getMaxSimdSize() == 32) ? 32 : 16) * 3 * numThreads * sizeof(uint16_t);
@@ -575,7 +580,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, usedBindingTableStatePointer) {
575580
0,
576581
0,
577582
pDevice->getPreemptionMode(),
578-
nullptr);
583+
nullptr,
584+
true);
579585

580586
EXPECT_EQ(0x00000000u, *(&bindingTableStatesPointers[0]));
581587
EXPECT_EQ(0x00000040u, *(&bindingTableStatesPointers[1]));
@@ -728,7 +734,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, usedBindingTableStatePointersFor
728734
0,
729735
0,
730736
pDevice->getPreemptionMode(),
731-
nullptr);
737+
nullptr,
738+
true);
732739

733740
bti = reinterpret_cast<typename FamilyType::BINDING_TABLE_STATE *>(reinterpret_cast<unsigned char *>(ssh.getCpuBase()) + localSshOffset + btiOffset);
734741
for (uint32_t i = 0; i < numSurfaces; ++i) {
@@ -962,7 +969,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelCommandsTest, GivenKernelWithSamplersWhenIndir
962969
interfaceDescriptorTableOffset,
963970
0,
964971
pDevice->getPreemptionMode(),
965-
nullptr);
972+
nullptr,
973+
true);
966974

967975
bool isMemorySame = memcmp(borderColorPointer, mockDsh, borderColorSize) == 0;
968976
EXPECT_TRUE(isMemorySame);

0 commit comments

Comments
 (0)