Skip to content

Commit c51b656

Browse files
fix: request instruction cache invalidation on module destroy
Invalidation is requested on both linux and windows, on Csr's that used Isa allocation. Related-To: NEO-10045 Signed-off-by: Fabian Zwolinski <[email protected]>
1 parent 64175d8 commit c51b656

File tree

12 files changed

+188
-7
lines changed

12 files changed

+188
-7
lines changed

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
135135
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), false);
136136
}
137137

138+
if (this->csr->isInstructionCacheFlushRequired()) {
139+
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForInstructionCacheFlush();
140+
}
141+
138142
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
139143

140144
NEO::LinearStream child(nullptr);
@@ -226,6 +230,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
226230
this->assignCsrTaskCountToFenceIfAvailable(hFence);
227231
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child);
228232

233+
if (this->csr->isInstructionCacheFlushRequired()) {
234+
NEO::MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(child);
235+
this->csr->setInstructionCacheFlushed();
236+
}
237+
229238
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child);
230239

231240
this->csr->setPreemptionMode(ctx.statePreemption);

level_zero/core/source/module/module_imp.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "level_zero/core/source/module/module_imp.h"
99

10+
#include "shared/source/command_stream/command_stream_receiver.h"
1011
#include "shared/source/compiler_interface/compiler_options.h"
1112
#include "shared/source/compiler_interface/compiler_options_extra.h"
1213
#include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h"
@@ -37,6 +38,7 @@
3738
#include "shared/source/memory_manager/memory_manager.h"
3839
#include "shared/source/memory_manager/memory_operations_handler.h"
3940
#include "shared/source/memory_manager/unified_memory_manager.h"
41+
#include "shared/source/os_interface/os_context.h"
4042
#include "shared/source/program/kernel_info.h"
4143
#include "shared/source/program/program_initialization.h"
4244

@@ -1540,6 +1542,19 @@ ze_result_t ModuleImp::destroy() {
15401542

15411543
auto tempHandle = debugModuleHandle;
15421544
auto tempDevice = device;
1545+
1546+
auto rootDeviceIndex = getDevice()->getNEODevice()->getRootDeviceIndex();
1547+
auto &executionEnvironment = getDevice()->getNEODevice()->getRootDeviceEnvironment().executionEnvironment;
1548+
1549+
for (const auto &kernelImmData : this->kernelImmDatas) {
1550+
for (auto &engine : executionEnvironment.memoryManager->getRegisteredEngines(rootDeviceIndex)) {
1551+
auto contextId = engine.osContext->getContextId();
1552+
if (kernelImmData->getIsaGraphicsAllocation()->isUsedByOsContext(contextId)) {
1553+
engine.commandStreamReceiver->registerInstructionCacheFlush();
1554+
}
1555+
}
1556+
}
1557+
15431558
delete this;
15441559

15451560
if (tempDevice->getL0Debugger() && tempHandle != 0) {

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2022-2023 Intel Corporation
2+
* Copyright (C) 2022-2024 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -837,5 +837,55 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, GivenDirtyFlagForContextInBi
837837
commandQueue->destroy();
838838
}
839839

840+
HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, GivenRegisterInstructionCacheFlushWhenExecutingCmdListsThenInstructionCacheInvalidateIsSent) {
841+
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
842+
ze_command_queue_desc_t queueDesc = {};
843+
ze_result_t returnValue;
844+
845+
neoDevice->getDefaultEngine().commandStreamReceiver->registerInstructionCacheFlush();
846+
847+
queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
848+
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, false, returnValue));
849+
ASSERT_NE(nullptr, commandQueue);
850+
851+
auto usedSpaceBefore = commandQueue->commandStream.getUsed();
852+
853+
ze_command_list_handle_t commandLists[] = {
854+
CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)->toHandle()};
855+
uint32_t numCommandLists = 1;
856+
CommandList::fromHandle(commandLists[0])->close();
857+
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
858+
859+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
860+
861+
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
862+
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
863+
864+
GenCmdList cmdList;
865+
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
866+
cmdList, ptrOffset(commandQueue->commandStream.getCpuBase(), 0), usedSpaceAfter));
867+
868+
auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
869+
ASSERT_NE(0u, pipeControls.size());
870+
871+
bool foundInstructionCacheInvalidate = false;
872+
for (auto pipeControlIT : pipeControls) {
873+
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*pipeControlIT);
874+
if (pipeControl->getInstructionCacheInvalidateEnable()) {
875+
foundInstructionCacheInvalidate = true;
876+
break;
877+
}
878+
}
879+
880+
EXPECT_TRUE(foundInstructionCacheInvalidate);
881+
882+
for (auto i = 0u; i < numCommandLists; i++) {
883+
auto commandList = CommandList::fromHandle(commandLists[i]);
884+
commandList->destroy();
885+
}
886+
887+
commandQueue->destroy();
888+
}
889+
840890
} // namespace ult
841891
} // namespace L0

level_zero/core/test/unit_tests/sources/module/test_module_2.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2022-2023 Intel Corporation
2+
* Copyright (C) 2022-2024 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -9,6 +9,7 @@
99
#include "shared/source/helpers/aligned_memory.h"
1010
#include "shared/source/helpers/file_io.h"
1111
#include "shared/test/common/helpers/test_files.h"
12+
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
1213
#include "shared/test/common/mocks/mock_device.h"
1314
#include "shared/test/common/mocks/mock_modules_zebin.h"
1415
#include "shared/test/common/test_macros/test.h"
@@ -17,7 +18,9 @@
1718
#include "level_zero/core/source/kernel/kernel.h"
1819
#include "level_zero/core/source/module/module_build_log.h"
1920
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
21+
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
2022
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
23+
2124
namespace L0 {
2225
namespace ult {
2326

@@ -75,6 +78,36 @@ TEST_F(ModuleTests, whenCreatingAutoGrfBuildOptionsThenOptionsParsedCorrectly) {
7578
EXPECT_TRUE(NEO::CompilerOptions::contains(internalBuildOptions, NEO::CompilerOptions::autoGrf));
7679
}
7780

81+
TEST(ModuleDestroyTest, givenIsaAllocationWhenIsModuleDestroyedThenRequireInstructionCacheFlushInCsrThatUsedTheAllocation) {
82+
const uint32_t rootDeviceIndex = 0u;
83+
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
84+
auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, rootDeviceIndex);
85+
86+
MockCommandStreamReceiver *mockCommandStreamReceiver = new MockCommandStreamReceiver(*neoMockDevice->executionEnvironment, neoMockDevice->getRootDeviceIndex(), neoMockDevice->getDeviceBitfield());
87+
mockCommandStreamReceiver->makeResidentParentCall = true;
88+
89+
neoMockDevice->resetCommandStreamReceiver(mockCommandStreamReceiver);
90+
91+
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
92+
93+
auto module = new MockModule{&deviceImp, nullptr, ModuleType::user};
94+
module->translationUnit.reset(new MockModuleTranslationUnit{&deviceImp});
95+
96+
auto kernelInfo = new KernelInfo{};
97+
kernelInfo->heapInfo.pKernelHeap = reinterpret_cast<const void *>(0xdeadbeef0000);
98+
kernelInfo->heapInfo.kernelHeapSize = static_cast<uint32_t>(0x40);
99+
module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
100+
101+
module->initializeKernelImmutableDatas();
102+
auto &kernelImmDatas = module->getKernelImmutableDataVector();
103+
auto csr = deviceImp.getNEODevice()->getEngine(0).commandStreamReceiver;
104+
csr->makeResident(*kernelImmDatas[0]->getIsaParentAllocation());
105+
106+
module->destroy();
107+
108+
EXPECT_TRUE(mockCommandStreamReceiver->requiresInstructionCacheFlush);
109+
}
110+
78111
TEST(ModuleBuildLog, WhenCreatingModuleBuildLogThenNonNullPointerReturned) {
79112
auto moduleBuildLog = ModuleBuildLog::create();
80113
ASSERT_NE(nullptr, moduleBuildLog);

shared/source/command_stream/command_stream_receiver.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,14 @@ class CommandStreamReceiver {
273273

274274
void downloadAllocation(GraphicsAllocation &gfxAllocation);
275275

276+
bool isInstructionCacheFlushRequired() const {
277+
return requiresInstructionCacheFlush;
278+
}
279+
280+
void setInstructionCacheFlushed() {
281+
requiresInstructionCacheFlush = false;
282+
}
283+
276284
void registerInstructionCacheFlush() {
277285
auto mutex = obtainUniqueOwnership();
278286
requiresInstructionCacheFlush = true;

shared/source/command_stream/command_stream_receiver_hw_base.inl

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
304304
flushData.estimatedSize += MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush();
305305
}
306306

307+
if (requiresInstructionCacheFlush) {
308+
flushData.estimatedSize += MemorySynchronizationCommands<GfxFamily>::getSizeForInstructionCacheFlush();
309+
}
310+
307311
auto &csrCommandStream = getCS(flushData.estimatedSize);
308312
flushData.csrStartOffset = csrCommandStream.getUsed();
309313

@@ -312,6 +316,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
312316
MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(csrCommandStream, device.getRootDeviceEnvironment());
313317
}
314318

319+
if (requiresInstructionCacheFlush) {
320+
MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(csrCommandStream);
321+
requiresInstructionCacheFlush = false;
322+
}
323+
315324
dispatchImmediateFlushPipelineSelectCommand(flushData, csrCommandStream);
316325
dispatchImmediateFlushFrontEndCommand(flushData, device, csrCommandStream);
317326
dispatchImmediateFlushStateComputeModeCommand(flushData, csrCommandStream);
@@ -540,9 +549,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
540549
}
541550

542551
if (requiresInstructionCacheFlush) {
543-
PipeControlArgs args;
544-
args.instructionCacheInvalidateEnable = true;
545-
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
552+
MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(commandStreamCSR);
546553
requiresInstructionCacheFlush = false;
547554
}
548555

@@ -942,7 +949,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
942949
}
943950

944951
if (requiresInstructionCacheFlush) {
945-
size += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false);
952+
size += MemorySynchronizationCommands<GfxFamily>::getSizeForInstructionCacheFlush();
946953
}
947954

948955
if (debugManager.flags.ForcePipeControlPriorToWalker.get()) {

shared/source/helpers/gfx_core_helper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,13 +459,15 @@ struct MemorySynchronizationCommands {
459459
static void addFullCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment);
460460
static void setCacheFlushExtraProperties(PipeControlArgs &args);
461461
static void addStateCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment);
462+
static void addInstructionCacheFlush(LinearStream &commandStream);
462463

463464
static size_t getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, bool tlbInvalidationRequired);
464465
static size_t getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment);
465466
static size_t getSizeForSingleBarrier(bool tlbInvalidationRequired);
466467
static size_t getSizeForSingleAdditionalSynchronizationForDirectSubmission(const RootDeviceEnvironment &rootDeviceEnvironment);
467468
static size_t getSizeForSingleAdditionalSynchronization(const RootDeviceEnvironment &rootDeviceEnvironment);
468469
static size_t getSizeForAdditonalSynchronization(const RootDeviceEnvironment &rootDeviceEnvironment);
470+
static size_t getSizeForInstructionCacheFlush();
469471
static size_t getSizeForFullCacheFlush();
470472

471473
static bool isBarrierWaRequired(const RootDeviceEnvironment &rootDeviceEnvironment);

shared/source/helpers/gfx_core_helper_base.inl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,19 @@ void MemorySynchronizationCommands<GfxFamily>::addStateCacheFlush(LinearStream &
528528
*reinterpret_cast<PIPE_CONTROL *>(commandsBuffer) = cmd;
529529
}
530530

531+
template <typename GfxFamily>
532+
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForInstructionCacheFlush() {
533+
return MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false);
534+
}
535+
536+
template <typename GfxFamily>
537+
void MemorySynchronizationCommands<GfxFamily>::addInstructionCacheFlush(LinearStream &commandStream) {
538+
PipeControlArgs args;
539+
args.instructionCacheInvalidateEnable = true;
540+
541+
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStream, args);
542+
}
543+
531544
template <typename GfxFamily>
532545
const StackVec<size_t, 3> GfxCoreHelperHw<GfxFamily>::getDeviceSubGroupSizes() const {
533546
return {8, 16, 32};

shared/test/common/mocks/mock_command_stream_receiver.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
4949
using CommandStreamReceiver::osContext;
5050
using CommandStreamReceiver::ownershipMutex;
5151
using CommandStreamReceiver::preemptionAllocation;
52+
using CommandStreamReceiver::requiresInstructionCacheFlush;
5253
using CommandStreamReceiver::tagAddress;
5354
using CommandStreamReceiver::tagsMultiAllocation;
5455
using CommandStreamReceiver::taskCount;
@@ -183,6 +184,9 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
183184
GraphicsAllocation *getClearColorAllocation() override { return nullptr; }
184185
void makeResident(GraphicsAllocation &gfxAllocation) override {
185186
makeResidentCalledTimes++;
187+
if (makeResidentParentCall) {
188+
return CommandStreamReceiver::makeResident(gfxAllocation);
189+
}
186190
}
187191

188192
std::unique_lock<CommandStreamReceiver::MutexType> obtainHostPtrSurfaceCreationLock() override {
@@ -220,6 +224,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
220224
bool programHardwareContextCalled = false;
221225
bool createPreemptionAllocationReturn = true;
222226
bool createPreemptionAllocationParentCall = false;
227+
bool makeResidentParentCall = false;
223228
bool programComputeBarrierCommandCalled = false;
224229
bool programStallingCommandsForBarrierCalled = false;
225230
std::optional<bool> isGpuHangDetectedReturnValue{};

shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4729,6 +4729,25 @@ HWTEST_F(CommandStreamReceiverHwTest, GivenDirtyFlagForContextInBindlessHelperWh
47294729
EXPECT_FALSE(bindlessHeapsHelperPtr->getStateDirtyForContext(commandStreamReceiver.getOsContext().getContextId()));
47304730
}
47314731

4732+
HWTEST_F(CommandStreamReceiverHwTest, givenRequiresInstructionCacheFlushWhenFlushImmediateThenInstructionCacheInvalidateEnableIsSent) {
4733+
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
4734+
4735+
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
4736+
commandStreamReceiver.registerInstructionCacheFlush();
4737+
4738+
this->requiredStreamProperties.stateComputeMode.setPropertiesAll(false, GrfConfig::defaultGrfNumber, ThreadArbitrationPolicy::AgeBased, NEO::PreemptionMode::ThreadGroup);
4739+
4740+
commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice);
4741+
4742+
HardwareParse hwParserCsr;
4743+
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
4744+
auto pcCmd = hwParserCsr.getCommand<PIPE_CONTROL>();
4745+
ASSERT_NE(nullptr, pcCmd);
4746+
4747+
EXPECT_TRUE(pcCmd->getInstructionCacheInvalidateEnable());
4748+
EXPECT_FALSE(commandStreamReceiver.requiresInstructionCacheFlush);
4749+
}
4750+
47324751
HWTEST_F(CommandStreamReceiverHwTest, GivenFlushIsBlockingWhenFlushTaskCalledThenExpectMonitorFenceFlagTrue) {
47334752
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
47344753
commandStreamReceiver.recordFlusheBatchBuffer = true;

0 commit comments

Comments
 (0)