Skip to content

Commit 248e955

Browse files
Fix to optimize PC dispatched during appendMemory*() calls.
Includes fix to optimize PC post walker in case of timestamp events. Related-To: LOCI-1995 Signed-off-by: Vinod Tipparaju <[email protected]>
1 parent 3df6110 commit 248e955

File tree

4 files changed

+47
-40
lines changed

4 files changed

+47
-40
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
524524
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs,
525525
hEvent, numWaitEvents, phWaitEvents);
526526

527+
auto event = Event::fromHandle(hEvent);
528+
if (event) {
529+
allocationStruct.needsFlush &= !event->signalScope;
530+
}
531+
527532
if (allocationStruct.needsFlush) {
528533
NEO::PipeControlArgs args(true);
529534
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
@@ -1017,6 +1022,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
10171022
return result;
10181023
}
10191024

1025+
auto event = Event::fromHandle(hSignalEvent);
1026+
if (event) {
1027+
dstAllocationStruct.needsFlush &= !event->signalScope;
1028+
}
1029+
10201030
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
10211031
NEO::PipeControlArgs args(true);
10221032
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
@@ -1309,6 +1319,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
13091319

13101320
appendEventForProfilingAllWalkers(hSignalEvent, false);
13111321

1322+
auto event = Event::fromHandle(hSignalEvent);
1323+
if (event) {
1324+
hostPointerNeedsFlush &= !event->signalScope;
1325+
}
1326+
13121327
if (hostPointerNeedsFlush) {
13131328
NEO::PipeControlArgs args(true);
13141329
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
@@ -1567,15 +1582,10 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
15671582
} else {
15681583

15691584
NEO::PipeControlArgs args = {};
1570-
args.dcFlushEnable = true;
1585+
args.dcFlushEnable = (!event->signalScope) ? false : true;
15711586

15721587
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
15731588
appendWriteKernelTimestamp(hEvent, beforeWalker, true);
1574-
1575-
args.dcFlushEnable = (!event->signalScope) ? false : true;
1576-
if (args.dcFlushEnable) {
1577-
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
1578-
}
15791589
}
15801590
}
15811591
}
@@ -1714,10 +1724,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
17141724
return appendResult;
17151725
}
17161726

1717-
if (hSignalEvent) {
1718-
CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEvent);
1719-
}
1720-
17211727
return ZE_RESULT_SUCCESS;
17221728
}
17231729

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,30 +1190,42 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThen
11901190
commandList.commandContainer.getCommandStream()->getUsed()));
11911191
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
11921192
EXPECT_NE(cmdList.end(), itor);
1193-
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
1194-
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
1193+
{
1194+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
1195+
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
1196+
}
11951197

11961198
itor++;
11971199
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
11981200
EXPECT_NE(cmdList.end(), itor);
1199-
cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
1200-
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
1201+
{
1202+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
1203+
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
1204+
}
12011205

12021206
itor++;
12031207
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
12041208
EXPECT_NE(cmdList.end(), itor);
1209+
{
1210+
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
1211+
EXPECT_FALSE(cmd->getDcFlushEnable());
1212+
}
12051213

12061214
itor++;
12071215
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
12081216
EXPECT_NE(cmdList.end(), itor);
1209-
cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
1210-
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
1217+
{
1218+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
1219+
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
1220+
}
12111221

12121222
itor++;
12131223
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
12141224
EXPECT_NE(cmdList.end(), itor);
1215-
cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
1216-
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
1225+
{
1226+
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
1227+
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
1228+
}
12171229

12181230
auto temp = itor;
12191231
auto numPCs = findAll<PIPE_CONTROL *>(temp, cmdList.end());
@@ -1222,8 +1234,10 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThen
12221234

12231235
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
12241236
EXPECT_NE(cmdList.end(), itor);
1225-
auto cmd1 = genCmdCast<PIPE_CONTROL *>(*itor);
1226-
EXPECT_TRUE(cmd1->getDcFlushEnable());
1237+
{
1238+
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
1239+
EXPECT_TRUE(cmd->getDcFlushEnable());
1240+
}
12271241
}
12281242
} // namespace ult
12291243
} // namespace L0

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020 Intel Corporation
2+
* Copyright (C) 2020-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -513,6 +513,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithS
513513
ze_event_desc_t eventDesc = {};
514514
eventDesc.index = 0;
515515
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
516+
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
516517
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
517518
events.push_back(event.get());
518519
eventDesc.index = 1;
@@ -547,9 +548,6 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithS
547548
EXPECT_NE(cmdList.end(), itor);
548549
itor++;
549550
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
550-
EXPECT_NE(cmdList.end(), itor);
551-
itor++;
552-
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
553551
EXPECT_EQ(cmdList.end(), itor);
554552

555553
device->getDriverHandle()->freeMem(dst_buffer);
@@ -576,8 +574,8 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
576574

577575
ze_event_desc_t eventDesc = {};
578576
eventDesc.index = 0;
579-
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
580-
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
577+
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_DEVICE;
578+
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
581579
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
582580
events.push_back(event.get());
583581
eventDesc.index = 1;
@@ -604,9 +602,6 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
604602
itor++;
605603
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
606604
EXPECT_NE(cmdList.end(), itor);
607-
itor++;
608-
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
609-
EXPECT_NE(cmdList.end(), itor);
610605
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
611606
EXPECT_TRUE(cmd->getDcFlushEnable());
612607

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -358,17 +358,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
358358
}
359359
itor++;
360360

361-
auto temp = itor;
362-
auto numPCs = findAll<PIPE_CONTROL *>(temp, cmdList.end());
363-
//we should have only one PC with dcFlush added
364-
ASSERT_EQ(1u, numPCs.size());
365-
366-
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
367-
ASSERT_NE(cmdList.end(), itor);
368-
{
369-
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
370-
EXPECT_TRUE(cmd->getDcFlushEnable());
371-
}
361+
auto numPCs = findAll<PIPE_CONTROL *>(itor, cmdList.end());
362+
//we should not have PC when signal scope is device
363+
ASSERT_EQ(0u, numPCs.size());
372364

373365
{
374366
auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),

0 commit comments

Comments
 (0)