@@ -998,6 +998,85 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithImplicitArgsWhenAppendLa
998998
999999 alignedFree (expectedLocalIds);
10001000}
1001+ HWTEST_F (CmdlistAppendLaunchKernelTests, givenKernelWithImplicitArgsAndHwGeneratedLocalIdsWhenAppendLaunchKernelThenImplicitArgsLocalIdsRespectWalkOrder) {
1002+ std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u );
1003+ auto kernelDescriptor = mockKernelImmData->kernelDescriptor ;
1004+ kernelDescriptor->kernelAttributes .flags .requiresImplicitArgs = true ;
1005+ auto simd = kernelDescriptor->kernelAttributes .simdSize ;
1006+ kernelDescriptor->kernelAttributes .workgroupDimensionsOrder [0 ] = 2 ;
1007+ kernelDescriptor->kernelAttributes .workgroupDimensionsOrder [1 ] = 1 ;
1008+ kernelDescriptor->kernelAttributes .workgroupDimensionsOrder [2 ] = 0 ;
1009+ createModuleFromBinary (0u , false , mockKernelImmData.get ());
1010+
1011+ auto kernel = std::make_unique<MockKernel>(module .get ());
1012+
1013+ ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
1014+ kernel->initialize (&kernelDesc);
1015+ kernel->kernelRequiresGenerationOfLocalIdsByRuntime = false ;
1016+ kernel->requiredWorkgroupOrder = 2 ; // walk order 1 0 2
1017+
1018+ EXPECT_TRUE (kernel->getKernelDescriptor ().kernelAttributes .flags .requiresImplicitArgs );
1019+ ASSERT_NE (nullptr , kernel->getImplicitArgs ());
1020+
1021+ kernel->setGroupSize (4 , 5 , 6 );
1022+ kernel->setGroupCount (3 , 2 , 1 );
1023+ kernel->setGlobalOffsetExp (1 , 2 , 3 );
1024+ kernel->patchGlobalOffset ();
1025+
1026+ ze_result_t result{};
1027+ std::unique_ptr<L0::CommandList> commandList (CommandList::create (productFamily, device, NEO::EngineGroupType::RenderCompute, 0u , result));
1028+
1029+ EXPECT_EQ (ZE_RESULT_SUCCESS, result);
1030+
1031+ auto indirectHeap = commandList->commandContainer .getIndirectHeap (NEO::HeapType::INDIRECT_OBJECT);
1032+ memset (indirectHeap->getSpace (0 ), 0 , kernel->getSizeForImplicitArgsPatching ());
1033+
1034+ ze_group_count_t groupCount{3 , 2 , 1 };
1035+ result = commandList->appendLaunchKernel (kernel->toHandle (), &groupCount, nullptr , 0 , nullptr );
1036+ EXPECT_EQ (ZE_RESULT_SUCCESS, result);
1037+
1038+ auto sizeCrossThreadData = kernel->getCrossThreadDataSize ();
1039+ auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup ();
1040+ EXPECT_EQ (indirectHeap->getUsed (), sizeCrossThreadData + sizePerThreadDataForWholeGroup + kernel->getSizeForImplicitArgsPatching ());
1041+
1042+ ImplicitArgs expectedImplicitArgs{sizeof (ImplicitArgs)};
1043+ expectedImplicitArgs.numWorkDim = 3 ;
1044+ expectedImplicitArgs.simdWidth = simd;
1045+ expectedImplicitArgs.localSizeX = 4 ;
1046+ expectedImplicitArgs.localSizeY = 5 ;
1047+ expectedImplicitArgs.localSizeZ = 6 ;
1048+ expectedImplicitArgs.globalSizeX = 12 ;
1049+ expectedImplicitArgs.globalSizeY = 10 ;
1050+ expectedImplicitArgs.globalSizeZ = 6 ;
1051+ expectedImplicitArgs.globalOffsetX = 1 ;
1052+ expectedImplicitArgs.globalOffsetY = 2 ;
1053+ expectedImplicitArgs.globalOffsetZ = 3 ;
1054+ expectedImplicitArgs.groupCountX = 3 ;
1055+ expectedImplicitArgs.groupCountY = 2 ;
1056+ expectedImplicitArgs.groupCountZ = 1 ;
1057+ expectedImplicitArgs.localIdTablePtr = indirectHeap->getGraphicsAllocation ()->getGpuAddress ();
1058+ expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation ()->getGpuAddress ();
1059+
1060+ auto sizeForImplicitArgPatching = kernel->getSizeForImplicitArgsPatching ();
1061+
1062+ EXPECT_LT (0u , sizeForImplicitArgPatching);
1063+
1064+ auto localIdsProgrammingSize = sizeForImplicitArgPatching - sizeof (ImplicitArgs);
1065+
1066+ auto expectedLocalIds = alignedMalloc (localIdsProgrammingSize, 64 );
1067+ memset (expectedLocalIds, 0 , localIdsProgrammingSize);
1068+ constexpr uint32_t grfSize = sizeof (typename FamilyType::GRF);
1069+ NEO::generateLocalIDs (expectedLocalIds, simd,
1070+ std::array<uint16_t , 3 >{{4 , 5 , 6 }},
1071+ std::array<uint8_t , 3 >{{1 , 0 , 2 }},
1072+ false , grfSize);
1073+
1074+ EXPECT_EQ (0 , memcmp (expectedLocalIds, indirectHeap->getCpuBase (), localIdsProgrammingSize));
1075+ auto pImplicitArgs = reinterpret_cast <ImplicitArgs *>(ptrOffset (indirectHeap->getCpuBase (), localIdsProgrammingSize));
1076+ EXPECT_EQ (0 , memcmp (&expectedImplicitArgs, pImplicitArgs, sizeof (ImplicitArgs)));
1077+
1078+ alignedFree (expectedLocalIds);
1079+ }
10011080HWTEST_F (CmdlistAppendLaunchKernelTests, givenKernelWithoutImplicitArgsWhenAppendLaunchKernelThenImplicitArgsAreNotSentToIndirectHeap) {
10021081 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u );
10031082 auto kernelDescriptor = mockKernelImmData->kernelDescriptor ;
0 commit comments