Skip to content

Commit e09ac44

Browse files
Mask bit 0 of timestamp for event profiling
Related-to: LOCI-1161 Signed-off-by: Young Jin Yoon <[email protected]>
1 parent e0da0e1 commit e09ac44

File tree

7 files changed

+126
-32
lines changed

7 files changed

+126
-32
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ struct CommandListCoreFamily : CommandListImp {
211211
const void **pRanges);
212212

213213
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, uint32_t lws[3]);
214-
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker);
214+
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb);
215215
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker);
216216
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
217217
void appendSignalEventPostWalker(ze_event_handle_t hEvent);

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,7 +1295,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(ze
12951295
if (!beforeWalker) {
12961296
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
12971297
}
1298-
appendWriteKernelTimestamp(hEvent, beforeWalker);
1298+
appendWriteKernelTimestamp(hEvent, beforeWalker, false);
12991299
}
13001300

13011301
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -1441,15 +1441,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
14411441
}
14421442

14431443
template <GFXCORE_FAMILY gfxCoreFamily>
1444-
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker) {
1444+
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb) {
1445+
constexpr uint32_t mask = 0xfffffffe;
14451446
auto event = Event::fromHandle(hEvent);
14461447

14471448
auto baseAddr = event->getGpuAddress();
14481449
auto contextOffset = beforeWalker ? offsetof(TimestampPacketStorage::Packet, contextStart) : offsetof(TimestampPacketStorage::Packet, contextEnd);
14491450
auto globalOffset = beforeWalker ? offsetof(TimestampPacketStorage::Packet, globalStart) : offsetof(TimestampPacketStorage::Packet, globalEnd);
1450-
1451-
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, ptrOffset(baseAddr, globalOffset));
1452-
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, ptrOffset(baseAddr, contextOffset));
1451+
if (maskLsb) {
1452+
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, mask, ptrOffset(baseAddr, globalOffset));
1453+
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, mask, ptrOffset(baseAddr, contextOffset));
1454+
} else {
1455+
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, ptrOffset(baseAddr, globalOffset));
1456+
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, ptrOffset(baseAddr, contextOffset));
1457+
}
14531458
}
14541459

14551460
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -1469,14 +1474,14 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
14691474
commandContainer.addToResidencyContainer(&event->getAllocation());
14701475

14711476
if (beforeWalker) {
1472-
appendWriteKernelTimestamp(hEvent, beforeWalker);
1477+
appendWriteKernelTimestamp(hEvent, beforeWalker, true);
14731478
} else {
14741479

14751480
NEO::PipeControlArgs args;
14761481
args.dcFlushEnable = true;
14771482

14781483
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
1479-
appendWriteKernelTimestamp(hEvent, beforeWalker);
1484+
appendWriteKernelTimestamp(hEvent, beforeWalker, true);
14801485

14811486
args.dcFlushEnable = (!event->signalScope) ? false : true;
14821487
if (args.dcFlushEnable) {

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenAppendMemoryFillCalledThenAppen
461461

462462
HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, Platforms) {
463463
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
464-
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
464+
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
465465
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
466466

467467
MockAppendMemoryCopy<gfxCoreFamily> commandList;
@@ -487,26 +487,30 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThen
487487
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
488488
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
489489
commandList.commandContainer.getCommandStream()->getUsed()));
490-
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
490+
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
491491
EXPECT_NE(cmdList.end(), itor);
492-
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
493-
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
492+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
493+
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
494+
494495
itor++;
496+
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
495497
EXPECT_NE(cmdList.end(), itor);
496-
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
497-
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
498+
cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
499+
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
498500

499501
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
500502
EXPECT_NE(cmdList.end(), itor);
501503

502-
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
504+
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
503505
EXPECT_NE(cmdList.end(), itor);
504-
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
505-
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
506+
cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
507+
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
508+
506509
itor++;
510+
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
507511
EXPECT_NE(cmdList.end(), itor);
508-
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
509-
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
512+
cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
513+
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
510514
}
511515

512516
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenSemaphoreWaitAndPipeControlAreFound, Platforms) {

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
284284
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
285285
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
286286
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
287-
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
287+
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
288288

289289
Mock<::L0::Kernel> kernel;
290290
ze_result_t returnValue;
@@ -312,19 +312,19 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
312312
EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer(
313313
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
314314

315-
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
315+
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
316316
ASSERT_NE(cmdList.end(), itor);
317317
{
318-
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
319-
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getRegisterAddress());
318+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
319+
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getSourceRegisterAddress());
320320
}
321321
itor++;
322322

323-
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
323+
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
324324
ASSERT_NE(cmdList.end(), itor);
325325
{
326-
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
327-
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getRegisterAddress());
326+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
327+
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getSourceRegisterAddress());
328328
}
329329
itor++;
330330

@@ -341,19 +341,19 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
341341
}
342342
itor++;
343343

344-
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
344+
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
345345
ASSERT_NE(cmdList.end(), itor);
346346
{
347-
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
348-
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getRegisterAddress());
347+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
348+
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getSourceRegisterAddress());
349349
}
350350
itor++;
351351

352-
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
352+
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
353353
EXPECT_NE(cmdList.end(), itor);
354354
{
355-
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
356-
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getRegisterAddress());
355+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
356+
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getSourceRegisterAddress());
357357
}
358358

359359
{

shared/source/command_container/command_encoder.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ struct EncodeMath {
114114
AluRegisters firstOperandRegister,
115115
AluRegisters secondOperandRegister,
116116
AluRegisters finalResultRegister);
117+
static void bitwiseAnd(CommandContainer &container,
118+
AluRegisters firstOperandRegister,
119+
AluRegisters secondOperandRegister,
120+
AluRegisters finalResultRegister);
117121
};
118122

119123
template <typename GfxFamily>
@@ -128,6 +132,11 @@ struct EncodeMathMMIO {
128132

129133
static void encodeGreaterThanPredicate(CommandContainer &container, uint64_t lhsVal, uint32_t rhsVal);
130134

135+
static void encodeBitwiseAndVal(CommandContainer &container,
136+
uint32_t regOffset,
137+
uint32_t immVal,
138+
uint64_t dstAddress);
139+
131140
static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters dest, AluRegisters result);
132141

133142
static void encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters regA, AluRegisters regB, AluRegisters finalResultRegister);

shared/source/command_container/command_encoder.inl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,21 @@ void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &contai
133133
EncodeSetMMIO<Family>::encodeREG(container, CS_PREDICATE_RESULT, CS_GPR_R2);
134134
}
135135

136+
/*
137+
* Compute bitwise AND between a register value from regOffset and immVal
138+
* and store it into dstAddress.
139+
*/
140+
template <typename Family>
141+
void EncodeMathMMIO<Family>::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress) {
142+
EncodeSetMMIO<Family>::encodeREG(container, CS_GPR_R0, regOffset);
143+
EncodeSetMMIO<Family>::encodeIMM(container, CS_GPR_R1, immVal, true);
144+
EncodeMath<Family>::bitwiseAnd(container, AluRegisters::R_0,
145+
AluRegisters::R_1,
146+
AluRegisters::R_2);
147+
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(),
148+
CS_GPR_R2, dstAddress);
149+
}
150+
136151
/*
137152
* encodeAlu() performs operations that leave a state including the result of
138153
* an operation such as the carry flag, and the accu flag with subtraction and
@@ -247,6 +262,19 @@ void EncodeMath<Family>::addition(CommandContainer &container,
247262
finalResultRegister);
248263
}
249264

265+
template <typename Family>
266+
void EncodeMath<Family>::bitwiseAnd(CommandContainer &container,
267+
AluRegisters firstOperandRegister,
268+
AluRegisters secondOperandRegister,
269+
AluRegisters finalResultRegister) {
270+
uint32_t *cmd = EncodeMath<Family>::commandReserve(container);
271+
272+
EncodeMathMMIO<Family>::encodeAluAnd(reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmd),
273+
firstOperandRegister,
274+
secondOperandRegister,
275+
finalResultRegister);
276+
}
277+
250278
template <typename Family>
251279
inline void EncodeSetMMIO<Family>::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap) {
252280
LriHelper<Family>::program(container.getCommandStream(),

shared/test/unit_test/encoders/test_encode_math.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,54 @@ HWTEST_F(CommandEncoderMathTest, commandReserve) {
169169
static_cast<uint32_t>(NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1));
170170
}
171171

172+
HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCalledThenContainerHasCorrectMathCommands) {
173+
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
174+
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
175+
using MI_MATH = typename FamilyType::MI_MATH;
176+
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
177+
178+
GenCmdList commands;
179+
CommandContainer cmdContainer;
180+
cmdContainer.initialize(pDevice);
181+
constexpr uint32_t regOffset = 0x2000u;
182+
constexpr uint32_t immVal = 0xbaau;
183+
constexpr uint64_t dstAddress = 0xDEADCAF0u;
184+
EncodeMathMMIO<FamilyType>::encodeBitwiseAndVal(cmdContainer, regOffset, immVal, dstAddress);
185+
186+
CmdParse<FamilyType>::parseCommandBuffer(commands,
187+
ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0),
188+
cmdContainer.getCommandStream()->getUsed());
189+
190+
auto itor = find<MI_LOAD_REGISTER_REG *>(commands.begin(), commands.end());
191+
192+
// load regOffset to R0
193+
EXPECT_NE(commands.end(), itor);
194+
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
195+
EXPECT_EQ(cmdLoadReg->getSourceRegisterAddress(), regOffset);
196+
EXPECT_EQ(cmdLoadReg->getDestinationRegisterAddress(), CS_GPR_R0);
197+
198+
// load immVal to R1
199+
itor++;
200+
EXPECT_NE(commands.end(), itor);
201+
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
202+
EXPECT_EQ(cmdLoadImm->getRegisterOffset(), CS_GPR_R1);
203+
EXPECT_EQ(cmdLoadImm->getDataDword(), immVal);
204+
205+
// encodeAluAnd should have its own unit tests, so we only check
206+
// that the MI_MATH exists and length is set to 3u
207+
itor++;
208+
EXPECT_NE(commands.end(), itor);
209+
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
210+
EXPECT_EQ(cmdMath->DW0.BitField.DwordLength, 3u);
211+
212+
// store R2 to address
213+
itor++;
214+
EXPECT_NE(commands.end(), itor);
215+
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
216+
EXPECT_EQ(cmdMem->getRegisterAddress(), CS_GPR_R2);
217+
EXPECT_EQ(cmdMem->getMemoryAddress(), dstAddress);
218+
}
219+
172220
HWTEST_F(CommandEncoderMathTest, setGroupSizeIndirect) {
173221
using MI_MATH = typename FamilyType::MI_MATH;
174222
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;

0 commit comments

Comments
 (0)