Skip to content

Commit 5823450

Browse files
Refactor in queryKernelTimestamps
Change-Id: Icc0731c973fe797946eea06db29b0737ceef8778
1 parent 9f21418 commit 5823450

File tree

6 files changed

+60
-16
lines changed

6 files changed

+60
-16
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,17 +1505,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
15051505
UNRECOVERABLE_IF(!result);
15061506

15071507
Kernel *builtinFunction = nullptr;
1508+
auto useOnlyGlobalTimestamps = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u;
15081509

15091510
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
15101511

15111512
if (pOffsets == nullptr) {
15121513
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps);
1514+
builtinFunction->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
15131515
} else {
15141516
auto pOffsetAllocationStruct = getAlignedAllocation(this->device, pOffsets, sizeof(size_t) * numEvents);
15151517
auto offsetValPtr = static_cast<uintptr_t>(pOffsetAllocationStruct.alloc->getGpuAddress());
15161518
commandContainer.addToResidencyContainer(pOffsetAllocationStruct.alloc);
15171519
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets);
15181520
builtinFunction->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc);
1521+
builtinFunction->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
15191522
offsetValPtr += sizeof(size_t);
15201523
}
15211524

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ struct CmdListHelper {
375375
NEO::ResidencyContainer residencyContainer;
376376
ze_group_count_t threadGroupDimensions;
377377
const uint32_t *groupSize = nullptr;
378+
uint32_t useOnlyGlobalTimestamp = std::numeric_limits<uint32_t>::max();
378379
};
379380

380381
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -394,6 +395,20 @@ class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCo
394395
cmdListHelper.groupSize = kernel->getGroupSize();
395396
cmdListHelper.threadGroupDimensions = *pThreadGroupDimensions;
396397

398+
auto kernelName = kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName;
399+
NEO::ArgDescriptor arg;
400+
if (kernelName == "QueryKernelTimestamps") {
401+
arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[2u];
402+
} else if (kernelName == "QueryKernelTimestampsWithOffsets") {
403+
arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[3u];
404+
} else {
405+
return ZE_RESULT_SUCCESS;
406+
}
407+
auto corssThreadData = kernel->getCrossThreadData();
408+
auto element = arg.as<NEO::ArgDescValue>().elements[0];
409+
auto pDst = ptrOffset(corssThreadData, element.offset);
410+
cmdListHelper.useOnlyGlobalTimestamp = *(uint32_t *)(pDst);
411+
397412
return ZE_RESULT_SUCCESS;
398413
}
399414
};
@@ -421,7 +436,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
421436
bool containsDstPtr = false;
422437

423438
for (auto &a : commandList.cmdListHelper.residencyContainer) {
424-
if (a->getGpuAddress() == reinterpret_cast<uint64_t>(alloc)) {
439+
if (a != nullptr && a->getGpuAddress() == reinterpret_cast<uint64_t>(alloc)) {
425440
containsDstPtr = true;
426441
}
427442
}
@@ -433,6 +448,8 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
433448
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[1]);
434449
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[2]);
435450

451+
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
452+
436453
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
437454
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
438455
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);
@@ -464,7 +481,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
464481
bool containsDstPtr = false;
465482

466483
for (auto &a : commandList.cmdListHelper.residencyContainer) {
467-
if (a->getGpuAddress() == reinterpret_cast<uint64_t>(alloc)) {
484+
if (a != nullptr && a->getGpuAddress() == reinterpret_cast<uint64_t>(alloc)) {
468485
containsDstPtr = true;
469486
}
470487
}
@@ -474,7 +491,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
474491
bool containOffsetPtr = false;
475492

476493
for (auto &a : commandList.cmdListHelper.residencyContainer) {
477-
if (a->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAlloc)) {
494+
if (a != nullptr && a->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAlloc)) {
478495
containOffsetPtr = true;
479496
}
480497
}
@@ -486,6 +503,8 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
486503
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[1]);
487504
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[2]);
488505

506+
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
507+
489508
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
490509
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
491510
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);
@@ -528,6 +547,8 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
528547
EXPECT_EQ(groupSizeY, commandList.cmdListHelper.groupSize[1]);
529548
EXPECT_EQ(groupSizeZ, commandList.cmdListHelper.groupSize[2]);
530549

550+
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
551+
531552
EXPECT_EQ(static_cast<uint32_t>(eventCount) / groupSizeX, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
532553
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
533554
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);

opencl/test/unit_test/helpers/kernel_binary_helper_hash_value.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77

88
#include "opencl/test/unit_test/helpers/kernel_binary_helper.h"
99

10-
const std::string KernelBinaryHelper::BUILT_INS("7020674763881029420");
10+
const std::string KernelBinaryHelper::BUILT_INS("15239427326891676972");

shared/source/built_ins/kernels/copy_kernel_timestamps.builtin_kernel

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*/
77

88
R"===(
9-
__kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* dst) {
9+
__kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* dst, uint useOnlyGlobalTimestamps) {
1010
uint gid = get_global_id(0);
1111
const ulong tsMask = (1ull << 32) - 1;
1212
uint currentOffset = gid * 4;
@@ -19,11 +19,16 @@ __kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* d
1919
__global uint *src = (__global uint *) srcPtr;
2020
dst[currentOffset] = src[1] & tsMask;
2121
dst[currentOffset + 1] = src[3] & tsMask;
22-
dst[currentOffset + 2] = src[0] & tsMask;
23-
dst[currentOffset + 3] = src[2] & tsMask;
22+
if (useOnlyGlobalTimestamps != 0) {
23+
dst[currentOffset + 2] = src[1] & tsMask;
24+
dst[currentOffset + 3] = src[3] & tsMask;
25+
} else {
26+
dst[currentOffset + 2] = src[0] & tsMask;
27+
dst[currentOffset + 3] = src[2] & tsMask;
28+
}
2429
}
2530

26-
__kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __global ulong* dst, __global ulong *offsets) {
31+
__kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __global ulong* dst, __global ulong *offsets, uint useOnlyGlobalTimestamps) {
2732
uint gid = get_global_id(0);
2833
const ulong tsMask = (1ull << 32) - 1;
2934
uint currentOffset = offsets[gid] / 8;
@@ -36,7 +41,12 @@ __kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __glob
3641
__global uint *src = (__global uint *) srcPtr;
3742
dst[currentOffset] = src[1] & tsMask;
3843
dst[currentOffset + 1] = src[3] & tsMask;
39-
dst[currentOffset + 2] = src[0] & tsMask;
40-
dst[currentOffset + 3] = src[2] & tsMask;
44+
if (useOnlyGlobalTimestamps != 0) {
45+
dst[currentOffset + 2] = src[1] & tsMask;
46+
dst[currentOffset + 3] = src[3] & tsMask;
47+
} else {
48+
dst[currentOffset + 2] = src[0] & tsMask;
49+
dst[currentOffset + 3] = src[2] & tsMask;
50+
}
4151
}
4252
)==="

shared/test/unit_test/test_files/7020674763881029420.cl renamed to shared/test/unit_test/test_files/15239427326891676972.cl

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
561561
}
562562
}
563563

564-
__kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* dst) {
564+
__kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* dst, uint useOnlyGlobalTimestamps) {
565565
uint gid = get_global_id(0);
566566
const ulong tsMask = (1ull << 32) - 1;
567567
uint currentOffset = gid * 4;
@@ -574,11 +574,16 @@ __kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* d
574574
__global uint *src = (__global uint *) srcPtr;
575575
dst[currentOffset] = src[1] & tsMask;
576576
dst[currentOffset + 1] = src[3] & tsMask;
577-
dst[currentOffset + 2] = src[0] & tsMask;
578-
dst[currentOffset + 3] = src[2] & tsMask;
577+
if (useOnlyGlobalTimestamps != 0) {
578+
dst[currentOffset + 2] = src[1] & tsMask;
579+
dst[currentOffset + 3] = src[3] & tsMask;
580+
} else {
581+
dst[currentOffset + 2] = src[0] & tsMask;
582+
dst[currentOffset + 3] = src[2] & tsMask;
583+
}
579584
}
580585

581-
__kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __global ulong* dst, __global ulong *offsets) {
586+
__kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __global ulong* dst, __global ulong *offsets, uint useOnlyGlobalTimestamps) {
582587
uint gid = get_global_id(0);
583588
const ulong tsMask = (1ull << 32) - 1;
584589
uint currentOffset = offsets[gid] / 8;
@@ -591,6 +596,11 @@ __kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __glob
591596
__global uint *src = (__global uint *) srcPtr;
592597
dst[currentOffset] = src[1] & tsMask;
593598
dst[currentOffset + 1] = src[3] & tsMask;
594-
dst[currentOffset + 2] = src[0] & tsMask;
595-
dst[currentOffset + 3] = src[2] & tsMask;
599+
if (useOnlyGlobalTimestamps != 0) {
600+
dst[currentOffset + 2] = src[1] & tsMask;
601+
dst[currentOffset + 3] = src[3] & tsMask;
602+
} else {
603+
dst[currentOffset + 2] = src[0] & tsMask;
604+
dst[currentOffset + 3] = src[2] & tsMask;
605+
}
596606
}

shared/test/unit_test/test_files/7020674763881029420_options.txt renamed to shared/test/unit_test/test_files/15239427326891676972_options.txt

File renamed without changes.

0 commit comments

Comments
 (0)