Skip to content

Commit cd4f3c2

Browse files
Synchronize switching command buffers for all partitions
Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent 6b062a6 commit cd4f3c2

35 files changed

+271
-112
lines changed

level_zero/core/source/cmdqueue/cmdqueue.cpp

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal) {
6161
void CommandQueueImp::reserveLinearStreamSize(size_t size) {
6262
UNRECOVERABLE_IF(commandStream == nullptr);
6363
if (commandStream->getAvailableSpace() < size) {
64-
buffers.switchBuffers(csr);
64+
buffers.switchBuffers(csr, partitionCount, addressOffset);
6565
NEO::GraphicsAllocation *nextBufferAllocation = buffers.getCurrentBufferAllocation();
6666
commandStream->replaceBuffer(nextBufferAllocation->getUnderlyingBuffer(),
6767
defaultQueueCmdBufferSize);
@@ -87,7 +87,7 @@ void CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &
8787
ze_result_t CommandQueueImp::synchronize(uint64_t timeout) {
8888
if ((timeout == std::numeric_limits<uint64_t>::max()) && useKmdWaitFunction) {
8989
auto &waitPair = buffers.getCurrentFlushStamp();
90-
csr->waitForTaskCountWithKmdNotifyFallback(waitPair.first, waitPair.second, false, false);
90+
csr->waitForTaskCountWithKmdNotifyFallback(waitPair.first, waitPair.second, false, false, partitionCount, addressOffset);
9191
postSyncOperations();
9292
return ZE_RESULT_SUCCESS;
9393
} else {
@@ -106,19 +106,15 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout)
106106
timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
107107
}
108108

109+
bool ready = false;
109110
if (partitionCount > 1) {
110-
volatile uint32_t *pollAddress = csr->getTagAddress();
111-
for (uint32_t i = 0; i < partitionCount; i++) {
112-
csr->waitForCompletionWithTimeout(pollAddress, enableTimeout, timeoutMicroseconds, this->taskCount);
113-
pollAddress += addressOffsetDwords;
114-
}
111+
ready = csr->waitForCompletionWithTimeout(csr->getTagAddress(), enableTimeout, timeoutMicroseconds, taskCountToWait, partitionCount, addressOffset);
115112
} else {
116-
csr->waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, this->taskCount);
117-
if (*csr->getTagAddress() < taskCountToWait) {
118-
return ZE_RESULT_NOT_READY;
119-
}
113+
ready = csr->waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
114+
}
115+
if (!ready) {
116+
return ZE_RESULT_NOT_READY;
120117
}
121-
122118
postSyncOperations();
123119

124120
return ZE_RESULT_SUCCESS;
@@ -201,7 +197,7 @@ void CommandQueueImp::CommandBufferManager::destroy(NEO::MemoryManager *memoryMa
201197
}
202198
}
203199

204-
void CommandQueueImp::CommandBufferManager::switchBuffers(NEO::CommandStreamReceiver *csr) {
200+
void CommandQueueImp::CommandBufferManager::switchBuffers(NEO::CommandStreamReceiver *csr, uint32_t partitionCount, uint32_t offsetSize) {
205201
if (bufferUse == BUFFER_ALLOCATION::FIRST) {
206202
bufferUse = BUFFER_ALLOCATION::SECOND;
207203
} else {
@@ -211,7 +207,7 @@ void CommandQueueImp::CommandBufferManager::switchBuffers(NEO::CommandStreamRece
211207
auto completionId = flushId[bufferUse];
212208
if (completionId.second != 0u) {
213209
UNRECOVERABLE_IF(csr == nullptr);
214-
csr->waitForTaskCountWithKmdNotifyFallback(completionId.first, completionId.second, false, false);
210+
csr->waitForTaskCountWithKmdNotifyFallback(completionId.first, completionId.second, false, false, partitionCount, offsetSize);
215211
}
216212
}
217213

level_zero/core/source/cmdqueue/cmdqueue_imp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ struct CommandQueueImp : public CommandQueue {
3737

3838
ze_result_t initialize(Device *device, size_t sizeRequested);
3939
void destroy(NEO::MemoryManager *memoryManager);
40-
void switchBuffers(NEO::CommandStreamReceiver *csr);
40+
void switchBuffers(NEO::CommandStreamReceiver *csr, uint32_t partitionCount, uint32_t offsetSize);
4141

4242
NEO::GraphicsAllocation *getCurrentBufferAllocation() {
4343
return buffers[bufferUse];

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,8 +1529,10 @@ struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
15291529
tagAddress = new uint32_t;
15301530
}
15311531

1532-
bool waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) override {
1532+
bool waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait, uint32_t partitionCount, uint32_t offsetSize) override {
15331533
enableTimeoutSet = enableTimeout;
1534+
partitionCountSet = partitionCount;
1535+
offsetSizeSet = offsetSize;
15341536
waitForComplitionCalledTimes++;
15351537
return true;
15361538
}
@@ -1541,9 +1543,9 @@ struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
15411543
return true;
15421544
}
15431545

1544-
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
1546+
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override {
15451547
waitForTaskCountWithKmdNotifyFallbackCalled++;
1546-
NEO::UltCommandStreamReceiver<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, forcePowerSavingMode);
1548+
NEO::UltCommandStreamReceiver<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
15471549
}
15481550

15491551
volatile uint32_t *getTagAddress() const override {
@@ -1553,6 +1555,8 @@ struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
15531555
uint32_t *tagAddress;
15541556
uint32_t waitForComplitionCalledTimes = 0;
15551557
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
1558+
uint32_t partitionCountSet = 0;
1559+
uint32_t offsetSizeSet = 0;
15561560
bool enableTimeoutSet = false;
15571561
};
15581562

@@ -1650,11 +1654,86 @@ HWTEST_F(CommandQueueSynchronizeTest, givenMultiplePartitionCountWhenCallingSync
16501654
uint64_t timeout = std::numeric_limits<uint64_t>::max();
16511655
commandQueue->synchronize(timeout);
16521656

1653-
EXPECT_EQ(2u, csr->waitForComplitionCalledTimes);
1657+
EXPECT_EQ(1u, csr->waitForComplitionCalledTimes);
1658+
EXPECT_EQ(2u, csr->partitionCountSet);
1659+
EXPECT_EQ(8u, csr->offsetSizeSet);
16541660

16551661
L0::CommandQueue::fromHandle(commandQueue)->destroy();
16561662
}
16571663

1664+
template <typename GfxFamily>
1665+
struct TestCmdQueueCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
1666+
TestCmdQueueCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
1667+
: NEO::UltCommandStreamReceiver<GfxFamily>(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {
1668+
}
1669+
MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait));
1670+
MOCK_METHOD6(waitForCompletionWithTimeout, bool(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait, uint32_t partitionCount, uint32_t offsetSize));
1671+
};
1672+
1673+
HWTEST_F(CommandQueueSynchronizeTest, givenSinglePartitionCountWhenWaitFunctionFailsThenReturnNotReady) {
1674+
auto csr = std::unique_ptr<TestCmdQueueCsr<FamilyType>>(new TestCmdQueueCsr<FamilyType>(*device->getNEODevice()->getExecutionEnvironment(),
1675+
device->getNEODevice()->getDeviceBitfield()));
1676+
csr->setupContext(*device->getNEODevice()->getDefaultEngine().osContext);
1677+
1678+
const ze_command_queue_desc_t desc{};
1679+
ze_result_t returnValue;
1680+
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
1681+
device,
1682+
csr.get(),
1683+
&desc,
1684+
false,
1685+
false,
1686+
returnValue));
1687+
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
1688+
ASSERT_NE(nullptr, commandQueue);
1689+
1690+
EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_,
1691+
::testing::_,
1692+
::testing::_))
1693+
.Times(1)
1694+
.WillOnce(::testing::Return(false));
1695+
1696+
uint64_t timeout = std::numeric_limits<uint64_t>::max();
1697+
returnValue = commandQueue->synchronize(timeout);
1698+
EXPECT_EQ(returnValue, ZE_RESULT_NOT_READY);
1699+
1700+
commandQueue->destroy();
1701+
}
1702+
1703+
HWTEST_F(CommandQueueSynchronizeTest, givenMultiplePartitionCountWhenWaitFunctionFailsThenReturnNotReady) {
1704+
auto csr = std::unique_ptr<TestCmdQueueCsr<FamilyType>>(new TestCmdQueueCsr<FamilyType>(*device->getNEODevice()->getExecutionEnvironment(),
1705+
device->getNEODevice()->getDeviceBitfield()));
1706+
csr->setupContext(*device->getNEODevice()->getDefaultEngine().osContext);
1707+
1708+
const ze_command_queue_desc_t desc{};
1709+
ze_result_t returnValue;
1710+
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
1711+
device,
1712+
csr.get(),
1713+
&desc,
1714+
false,
1715+
false,
1716+
returnValue));
1717+
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
1718+
ASSERT_NE(nullptr, commandQueue);
1719+
1720+
EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_,
1721+
::testing::_,
1722+
::testing::_,
1723+
::testing::_,
1724+
::testing::_,
1725+
::testing::_))
1726+
.Times(1)
1727+
.WillOnce(::testing::Return(false));
1728+
1729+
commandQueue->partitionCount = 2;
1730+
uint64_t timeout = std::numeric_limits<uint64_t>::max();
1731+
returnValue = commandQueue->synchronize(timeout);
1732+
EXPECT_EQ(returnValue, ZE_RESULT_NOT_READY);
1733+
1734+
commandQueue->destroy();
1735+
}
1736+
16581737
struct MemoryManagerCommandQueueCreateNegativeTest : public NEO::MockMemoryManager {
16591738
MemoryManagerCommandQueueCreateNegativeTest(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MockMemoryManager(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment)) {}
16601739
NEO::GraphicsAllocation *allocateGraphicsMemoryWithProperties(const NEO::AllocationProperties &properties) override {

opencl/source/command_queue/command_queue.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,16 +198,20 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcs
198198

199199
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
200200

201-
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, flushStampToWait,
202-
useQuickKmdSleep, forcePowerSavingMode);
201+
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
202+
flushStampToWait,
203+
useQuickKmdSleep,
204+
forcePowerSavingMode,
205+
1u,
206+
0u);
203207
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
204208

205209
if (gtpinIsGTPinInitialized()) {
206210
gtpinNotifyTaskCompletion(gpgpuTaskCountToWait);
207211
}
208212

209213
if (auto bcsCsr = getBcsCommandStreamReceiver()) {
210-
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false);
214+
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false, 1u, 0u);
211215
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCountToWait);
212216
}
213217

opencl/source/command_stream/aub_command_stream_receiver_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
6363
MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
6464
void pollForCompletion() override;
6565
void pollForCompletionImpl() override;
66-
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
66+
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
6767

6868
uint32_t getDumpHandle();
6969
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);

opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -599,8 +599,8 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
599599
}
600600

601601
template <typename GfxFamily>
602-
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
603-
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
602+
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
603+
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
604604
pollForCompletion();
605605
}
606606

opencl/source/command_stream/command_stream_receiver_with_aub_dump.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2020 Intel Corporation
2+
* Copyright (C) 2018-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -40,7 +40,8 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
4040
}
4141

4242
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
43-
bool useQuickKmdSleep, bool forcePowerSavingMode) override;
43+
bool useQuickKmdSleep, bool forcePowerSavingMode,
44+
uint32_t partitionCount, uint32_t offsetSize) override;
4445

4546
size_t getPreferredTagPoolSize() const override { return 1; }
4647

opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,13 @@ void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContex
7171

7272
template <typename BaseCSR>
7373
void CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
74-
bool useQuickKmdSleep, bool forcePowerSavingMode) {
74+
bool useQuickKmdSleep, bool forcePowerSavingMode,
75+
uint32_t partitionCount, uint32_t offsetSize) {
7576
if (aubCSR) {
76-
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
77+
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
7778
}
7879

79-
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
80+
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
8081
}
8182

8283
template <typename BaseCSR>

opencl/source/os_interface/linux/drm_command_stream.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
4747
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
4848
MOCKABLE_VIRTUAL void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override;
4949
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
50-
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
50+
bool waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) override;
5151
bool isKmdWaitModeActive() override;
5252

5353
DrmMemoryManager *getMemoryManager() const;
@@ -66,7 +66,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
6666
protected:
6767
MOCKABLE_VIRTUAL void flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency);
6868
MOCKABLE_VIRTUAL void exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId);
69-
MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue);
69+
MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue, uint32_t partitionCount, uint32_t offsetSize);
7070
bool isUserFenceWaitActive();
7171

7272
std::vector<BufferObject *> residency;

opencl/source/os_interface/linux/drm_command_stream.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,10 @@ GmmPageTableMngr *DrmCommandStreamReceiver<GfxFamily>::createPageTableManager()
218218
}
219219

220220
template <typename GfxFamily>
221-
bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStamp) {
221+
bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStamp, uint32_t partitionCount, uint32_t offsetSize) {
222222
auto waitValue = static_cast<uint32_t>(flushStamp);
223223
if (isUserFenceWaitActive()) {
224-
waitUserFence(waitValue);
224+
waitUserFence(waitValue, partitionCount, offsetSize);
225225
} else {
226226
this->drm->waitHandle(waitValue, kmdWaitTimeout);
227227
}

0 commit comments

Comments
 (0)