Skip to content

Commit ea8aa29

Browse files
Change pollForCompletion() insertion locations
Poll is done on: - Aub CSR destruction - expectMemory - blocking calls Poll is not done on flush Change-Id: I1a776a932cb608c01f0de249e7cef26b00147f31 Signed-off-by: Maciej Dziuban <[email protected]>
1 parent e1eab52 commit ea8aa29

File tree

8 files changed

+240
-89
lines changed

8 files changed

+240
-89
lines changed

runtime/command_stream/aub_command_stream_receiver_hw.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "runtime/memory_manager/page_table.h"
1616
#include "runtime/memory_manager/physical_address_allocator.h"
1717
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
18+
#include "runtime/utilities/spinlock.h"
1819

1920
namespace OCLRT {
2021

@@ -62,6 +63,8 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
6263
// Family specific version
6364
MOCKABLE_VIRTUAL void submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
6465
MOCKABLE_VIRTUAL void pollForCompletion();
66+
void pollForCompletionImpl();
67+
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
6568

6669
uint32_t getDumpHandle();
6770
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);
@@ -115,5 +118,8 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
115118

116119
bool dumpAubNonWritable = false;
117120
ExternalAllocationsContainer externalAllocations;
121+
122+
uint32_t pollForCompletionTaskCount = 0u;
123+
SpinLock pollForCompletionLock;
118124
};
119125
} // namespace OCLRT

runtime/command_stream/aub_command_stream_receiver_hw.inl

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
#include <algorithm>
3434
#include <cstring>
35+
#include "aub_command_stream_receiver_hw.h"
3536

3637
namespace OCLRT {
3738

@@ -89,6 +90,9 @@ AUBCommandStreamReceiverHw<GfxFamily>::AUBCommandStreamReceiverHw(const Hardware
8990

9091
template <typename GfxFamily>
9192
AUBCommandStreamReceiverHw<GfxFamily>::~AUBCommandStreamReceiverHw() {
93+
if (osContext) {
94+
pollForCompletion();
95+
}
9296
freeEngineInfoTable();
9397
}
9498

@@ -342,10 +346,6 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
342346

343347
submitBatchBuffer(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation));
344348

345-
if (!DebugManager.flags.AUBDumpConcurrentCS.get()) {
346-
pollForCompletion();
347-
}
348-
349349
if (this->standalone) {
350350
*this->tagAddress = this->peekLatestSentTaskCount();
351351
}
@@ -561,6 +561,17 @@ void AUBCommandStreamReceiverHw<GfxFamily>::submitBatchBuffer(uint64_t batchBuff
561561

562562
template <typename GfxFamily>
563563
void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion() {
564+
const auto lock = std::unique_lock<decltype(pollForCompletionLock)>{pollForCompletionLock};
565+
if (this->pollForCompletionTaskCount == this->taskCount) {
566+
return;
567+
}
568+
pollForCompletionImpl();
569+
}
570+
571+
template <typename GfxFamily>
572+
void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
573+
this->pollForCompletionTaskCount = this->taskCount;
574+
564575
if (hardwareContext) {
565576
hardwareContext->pollForCompletion();
566577
return;
@@ -578,6 +589,12 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletion() {
578589
AubMemDump::CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort);
579590
}
580591

592+
template <typename GfxFamily>
593+
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
594+
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
595+
pollForCompletion();
596+
}
597+
581598
template <typename GfxFamily>
582599
constexpr uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getMaskAndValueForPollForCompletion() {
583600
return 0x100;
@@ -691,6 +708,8 @@ void AUBCommandStreamReceiverHw<GfxFamily>::expectMemoryNotEqual(void *gfxAddres
691708
template <typename GfxFamily>
692709
void AUBCommandStreamReceiverHw<GfxFamily>::expectMemory(const void *gfxAddress, const void *srcAddress,
693710
size_t length, uint32_t compareOperation) {
711+
pollForCompletion();
712+
694713
if (hardwareContext) {
695714
hardwareContext->expectMemory(reinterpret_cast<uint64_t>(gfxAddress), srcAddress, length, compareOperation);
696715
}
@@ -846,4 +865,5 @@ template <typename GfxFamily>
846865
int AUBCommandStreamReceiverHw<GfxFamily>::getAddressSpaceFromPTEBits(uint64_t entryBits) const {
847866
return AubMemDump::AddressSpaceValues::TraceNonlocal;
848867
}
868+
849869
} // namespace OCLRT

unit_tests/aub_tests/command_stream/aub_command_stream_fixture.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,17 @@ class AUBCommandStreamFixture : public CommandStreamFixture {
7777
}
7878

7979
template <typename FamilyType>
80-
void pollForCompletion() {
80+
AUBCommandStreamReceiverHw<FamilyType> *getAubCsr() {
8181
CommandStreamReceiver *csr = pCommandStreamReceiver;
8282
if (testMode == TestMode::AubTestsWithTbx) {
8383
csr = reinterpret_cast<CommandStreamReceiverWithAUBDump<TbxCommandStreamReceiverHw<FamilyType>> *>(pCommandStreamReceiver)->aubCSR;
8484
}
85+
return reinterpret_cast<AUBCommandStreamReceiverHw<FamilyType> *>(csr);
86+
}
8587

86-
auto aubCsr = reinterpret_cast<AUBCommandStreamReceiverHw<FamilyType> *>(csr);
87-
aubCsr->pollForCompletion();
88+
template <typename FamilyType>
89+
void pollForCompletion() {
90+
getAubCsr<FamilyType>()->pollForCompletion();
8891
}
8992

9093
GraphicsAllocation *createResidentAllocationAndStoreItInCsr(const void *address, size_t size) {

unit_tests/aub_tests/command_stream/aub_command_stream_tests.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ struct AUBFixture : public AUBCommandStreamFixture,
5858
ResidencyContainer allocationsForResidency;
5959
pCommandStreamReceiver->flush(batchBuffer, allocationsForResidency);
6060

61+
AUBCommandStreamFixture::getAubCsr<FamilyType>()->pollForCompletionImpl();
6162
auto engineType = pCommandStreamReceiver->getOsContext().getEngineType();
6263
auto mmioBase = CommandStreamReceiverSimulatedCommonHw<FamilyType>::getCsTraits(engineType.type).mmioBase;
6364
AUBCommandStreamFixture::expectMMIO<FamilyType>(AubMemDump::computeRegisterOffset(mmioBase, 0x2094), noopId);
@@ -75,6 +76,7 @@ HWTEST_F(AUBcommandstreamTests, testFlushTwice) {
7576
BatchBuffer batchBuffer2{pCS->getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, pCS->getUsed(), pCS};
7677
ResidencyContainer allocationsForResidency2;
7778
pCommandStreamReceiver->flush(batchBuffer2, allocationsForResidency);
79+
AUBCommandStreamFixture::getAubCsr<FamilyType>()->pollForCompletionImpl();
7880
}
7981

8082
HWTEST_F(AUBcommandstreamTests, testNoopIdRcs) {

unit_tests/command_stream/aub_command_stream_receiver_2_tests.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,8 +851,9 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCsrWhenAskedForMemoryExpectation
851851
uint32_t compareNotEqual = AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual;
852852
uint32_t compareEqual = AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual;
853853

854-
MyMockAubCsr myMockCsr(**platformDevices, std::string(), true, *pDevice->getExecutionEnvironment());
855854
auto mockStream = std::make_unique<MockAubFileStream>();
855+
MyMockAubCsr myMockCsr(**platformDevices, std::string(), true, *pDevice->getExecutionEnvironment());
856+
myMockCsr.setupContext(pDevice->getExecutionEnvironment()->commandStreamReceivers[0][0]->getOsContext());
856857
myMockCsr.stream = mockStream.get();
857858

858859
myMockCsr.expectMemoryNotEqual(mockAddress, mockAddress, 1);

0 commit comments

Comments
 (0)