Skip to content

Commit ef4fae3

Browse files
Enable TBX mode in level zero
RelatedTo: NEO-4644 Change-Id: I76913d6b7c7d978a5a90a7a574778c67283497c1 Signed-off-by: Mateusz Hoppe <[email protected]>
1 parent 656468e commit ef4fae3

File tree

8 files changed

+57
-26
lines changed

8 files changed

+57
-26
lines changed

level_zero/core/source/cmdqueue/cmdqueue.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,6 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint32_t timeout)
6868
UNRECOVERABLE_IF(csr == nullptr);
6969

7070
auto taskCountToWait = this->taskCount;
71-
72-
waitForTaskCountWithKmdNotifyFallbackHelper(csr, this->taskCount, 0, false, false);
73-
7471
bool enableTimeout = (timeout != std::numeric_limits<uint32_t>::max());
7572
csr->waitForCompletionWithTimeout(enableTimeout, timeout, this->taskCount);
7673

level_zero/core/source/fence/fence.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,6 @@ ze_result_t FenceImp::hostSynchronize(uint32_t timeout) {
9292
return ZE_RESULT_SUCCESS;
9393
}
9494

95-
waitForTaskCountWithKmdNotifyFallbackHelper(cmdQueue->getCsr(), cmdQueue->getTaskCount(), 0, false, false);
96-
9795
if (timeout == 0) {
9896
return queryStatus();
9997
}

level_zero/core/source/hw_helpers/hw_helpers.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,4 @@ namespace L0 {
1515
inline uint64_t getIntermediateCacheSize(const NEO::HardwareInfo &hwInfo) {
1616
return 0u;
1717
}
18-
19-
inline void waitForTaskCountWithKmdNotifyFallbackHelper(NEO::CommandStreamReceiver *csr,
20-
uint32_t taskCountToWait,
21-
NEO::FlushStamp flushStampToWait,
22-
bool useQuickKmdSleep,
23-
bool forcePowerSavingMode) {
24-
}
25-
2618
} // namespace L0

opencl/source/command_stream/tbx_command_stream_receiver_hw.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
3131

3232
uint32_t getMaskAndValueForPollForCompletion() const;
3333
bool getpollNotEqualValueForPollForCompletion() const;
34+
void flushSubmissionsAndDownloadAllocations();
3435

3536
public:
3637
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO;
@@ -42,6 +43,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
4243
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
4344

4445
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
46+
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override;
4547
void downloadAllocation(GraphicsAllocation &gfxAllocation) override;
4648

4749
void processEviction() override;

opencl/source/command_stream/tbx_command_stream_receiver_hw.inl

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ bool TbxCommandStreamReceiverHw<GfxFamily>::expectMemory(const void *gfxAddress,
451451
}
452452

453453
template <typename GfxFamily>
454-
void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
454+
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations() {
455455
this->flushBatchedSubmissions();
456456

457457
while (*this->getTagAddress() < this->latestFlushedTaskCount) {
@@ -462,10 +462,20 @@ void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallbac
462462
downloadAllocation(*graphicsAllocation);
463463
}
464464
this->allocationsForDownload.clear();
465+
}
465466

467+
template <typename GfxFamily>
468+
void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
469+
flushSubmissionsAndDownloadAllocations();
466470
BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
467471
}
468472

473+
template <typename GfxFamily>
474+
bool TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
475+
flushSubmissionsAndDownloadAllocations();
476+
return BaseClass::waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
477+
}
478+
469479
template <typename GfxFamily>
470480
void TbxCommandStreamReceiverHw<GfxFamily>::processEviction() {
471481
this->allocationsForDownload.insert(this->getEvictionAllocations().begin(), this->getEvictionAllocations().end());

opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -396,17 +396,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingMakeSurfacePackNonResi
396396
}
397397

398398
HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForTaskCountWithKmdNotifyFallbackThenTagAllocationAndScheduledAllocationsAreDownloaded) {
399-
struct MockTbxCsr : TbxCommandStreamReceiverHw<FamilyType> {
400-
using CommandStreamReceiver::latestFlushedTaskCount;
401-
using TbxCommandStreamReceiverHw<FamilyType>::TbxCommandStreamReceiverHw;
402-
void downloadAllocation(GraphicsAllocation &gfxAllocation) override {
403-
*reinterpret_cast<uint32_t *>(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount;
404-
downloadedAllocations.insert(&gfxAllocation);
405-
}
406-
std::set<GraphicsAllocation *> downloadedAllocations;
407-
};
408-
409-
MockTbxCsr tbxCsr{*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()};
399+
MockTbxCsrRegisterDownloadedAllocations<FamilyType> tbxCsr{*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()};
410400
MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false);
411401
uint32_t tag = 0u;
412402
tbxCsr.setupContext(osContext);
@@ -430,6 +420,32 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForTaskCountWithKm
430420
EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size());
431421
}
432422

423+
HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForCompletionWithTimeoutThenFlushIsCalledAndTagAllocationAndScheduledAllocationsAreDownloaded) {
424+
MockTbxCsrRegisterDownloadedAllocations<FamilyType> tbxCsr{*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()};
425+
MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false);
426+
uint32_t tag = 0u;
427+
tbxCsr.setupContext(osContext);
428+
tbxCsr.setTagAllocation(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), false, sizeof(tag)}, &tag));
429+
tbxCsr.latestFlushedTaskCount = 1u;
430+
431+
MockGraphicsAllocation allocation1, allocation2, allocation3;
432+
allocation1.usageInfos[0].residencyTaskCount = 1;
433+
allocation2.usageInfos[0].residencyTaskCount = 1;
434+
allocation3.usageInfos[0].residencyTaskCount = 1;
435+
ASSERT_TRUE(allocation1.isResident(0u));
436+
ASSERT_TRUE(allocation2.isResident(0u));
437+
ASSERT_TRUE(allocation3.isResident(0u));
438+
439+
tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3};
440+
441+
tbxCsr.waitForCompletionWithTimeout(true, 0, 0);
442+
443+
std::set<GraphicsAllocation *> expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3};
444+
EXPECT_EQ(expectedDownloadedAllocations, tbxCsr.downloadedAllocations);
445+
EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size());
446+
EXPECT_TRUE(tbxCsr.flushBatchedSubmissionsCalled);
447+
}
448+
433449
HWTEST_F(TbxCommandSteamSimpleTest, whenTbxCommandStreamReceiverIsCreatedThenPPGTTAndGGTTCreatedHavePhysicalAddressAllocatorSet) {
434450
MockTbxCsr<FamilyType> tbxCsr(*pDevice->executionEnvironment);
435451

opencl/test/unit_test/mocks/mock_tbx_csr.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,20 @@ class MockTbxCsr : public TbxCommandStreamReceiverHw<GfxFamily> {
7070
bool makeCoherentCalled = false;
7171
bool dumpAllocationCalled = false;
7272
};
73+
74+
template <typename GfxFamily>
75+
struct MockTbxCsrRegisterDownloadedAllocations : TbxCommandStreamReceiverHw<GfxFamily> {
76+
using CommandStreamReceiver::latestFlushedTaskCount;
77+
using TbxCommandStreamReceiverHw<GfxFamily>::TbxCommandStreamReceiverHw;
78+
void downloadAllocation(GraphicsAllocation &gfxAllocation) override {
79+
*reinterpret_cast<uint32_t *>(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount;
80+
downloadedAllocations.insert(&gfxAllocation);
81+
}
82+
bool flushBatchedSubmissions() override {
83+
flushBatchedSubmissionsCalled = true;
84+
return true;
85+
}
86+
std::set<GraphicsAllocation *> downloadedAllocations;
87+
bool flushBatchedSubmissionsCalled = false;
88+
};
7389
} // namespace NEO

shared/source/command_stream/command_stream_receiver.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ class CommandStreamReceiver {
129129
bool isStallingPipeControlOnNextFlushRequired() const { return stallingPipeControlOnNextFlushRequired; }
130130

131131
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
132-
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
132+
virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
133133
virtual void downloadAllocation(GraphicsAllocation &gfxAllocation){};
134134

135135
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }

0 commit comments

Comments
 (0)