Skip to content

Commit 8a9ea9a

Browse files
Make migration for indirect allocations
Signed-off-by: Maciej Plewka <[email protected]>
1 parent 4f94984 commit 8a9ea9a

File tree

11 files changed

+211
-53
lines changed

11 files changed

+211
-53
lines changed

level_zero/core/source/cmdlist/cmdlist.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -144,19 +144,6 @@ NEO::PreemptionMode CommandList::obtainKernelPreemptionMode(Kernel *kernel) {
144144
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
145145
}
146146

147-
void CommandList::makeResidentAndMigrate(bool performMigration) {
148-
for (auto alloc : commandContainer.getResidencyContainer()) {
149-
csr->makeResident(*alloc);
150-
151-
if (performMigration &&
152-
(alloc->getAllocationType() == NEO::AllocationType::SVM_GPU ||
153-
alloc->getAllocationType() == NEO::AllocationType::SVM_CPU)) {
154-
auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
155-
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
156-
}
157-
}
158-
}
159-
160147
void CommandList::migrateSharedAllocations() {
161148
auto deviceImp = static_cast<DeviceImp *>(device);
162149
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(deviceImp->getDriverHandle());

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,6 @@ struct CommandList : _ze_command_list_handle_t {
278278
return static_cast<uint32_t>(returnPoints.size());
279279
}
280280

281-
void makeResidentAndMigrate(bool);
282281
void migrateSharedAllocations();
283282

284283
bool getSystolicModeSupport() const {

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
110110

111111
std::unique_lock<std::mutex> lockForIndirect;
112112
if (this->hasIndirectAllocationsAllowed()) {
113-
this->cmdQImmediate->handleIndirectAllocationResidency(this->getUnifiedMemoryControls(), lockForIndirect);
113+
this->cmdQImmediate->handleIndirectAllocationResidency(this->getUnifiedMemoryControls(), lockForIndirect, performMigration);
114114
}
115115

116116
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
@@ -123,7 +123,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
123123
}
124124
}
125125

126-
this->makeResidentAndMigrate(performMigration);
126+
this->cmdQImmediate->makeResidentAndMigrate(performMigration, this->commandContainer.getResidencyContainer());
127127

128128
if (performMigration) {
129129
this->migrateSharedAllocations();

level_zero/core/source/cmdqueue/cmdqueue.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*
66
*/
77

8+
#include "shared/source/command_container/cmdcontainer.h"
89
#include "shared/source/command_stream/command_stream_receiver.h"
910
#include "shared/source/command_stream/csr_definitions.h"
1011
#include "shared/source/command_stream/linear_stream.h"
@@ -264,7 +265,7 @@ NEO::WaitStatus CommandQueueImp::CommandBufferManager::switchBuffers(NEO::Comman
264265
return waitStatus;
265266
}
266267

267-
void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect) {
268+
void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) {
268269
NEO::Device *neoDevice = this->device->getNEODevice();
269270
auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager();
270271
auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
@@ -276,9 +277,23 @@ void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls un
276277
svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u);
277278
} else {
278279
lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership();
280+
NEO::ResidencyContainer residencyAllocations;
279281
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
280-
this->csr->getResidencyAllocations(),
282+
residencyAllocations,
281283
unifiedMemoryControls.generateMask());
284+
makeResidentAndMigrate(performMigration, residencyAllocations);
285+
}
286+
}
287+
288+
void CommandQueueImp::makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) {
289+
for (auto alloc : residencyContainer) {
290+
csr->makeResident(*alloc);
291+
if (performMigration &&
292+
(alloc->getAllocationType() == NEO::AllocationType::SVM_GPU ||
293+
alloc->getAllocationType() == NEO::AllocationType::SVM_CPU)) {
294+
auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
295+
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
296+
}
282297
}
283298
}
284299

level_zero/core/source/cmdqueue/cmdqueue.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@
1313

1414
#include <atomic>
1515
#include <mutex>
16+
#include <vector>
1617

1718
struct _ze_command_queue_handle_t {};
1819

1920
namespace NEO {
2021
class CommandStreamReceiver;
21-
}
22+
class GraphicsAllocation;
23+
using ResidencyContainer = std::vector<GraphicsAllocation *>;
24+
} // namespace NEO
2225

2326
struct UnifiedMemoryControls;
2427

@@ -52,7 +55,8 @@ struct CommandQueue : _ze_command_queue_handle_t {
5255
return static_cast<CommandQueue *>(handle);
5356
}
5457

55-
virtual void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect) = 0;
58+
virtual void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) = 0;
59+
virtual void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) = 0;
5660

5761
ze_command_queue_handle_t toHandle() { return this; }
5862

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
119119

120120
std::unique_lock<std::mutex> lockForIndirect;
121121
if (ctx.hasIndirectAccess) {
122-
handleIndirectAllocationResidency(ctx.unifiedMemoryControls, lockForIndirect);
122+
handleIndirectAllocationResidency(ctx.unifiedMemoryControls, lockForIndirect, ctx.isMigrationRequested);
123123
}
124124

125125
size_t linearStreamSizeEstimate = this->estimateLinearStreamSizeInitial(ctx, phCommandLists, numCommandLists);
@@ -563,7 +563,7 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
563563
}
564564

565565
this->partitionCount = std::max(this->partitionCount, commandList->partitionCount);
566-
commandList->makeResidentAndMigrate(ctx.isMigrationRequested);
566+
makeResidentAndMigrate(ctx.isMigrationRequested, commandList->commandContainer.getResidencyContainer());
567567
}
568568

569569
ctx.isDispatchTaskCountPostSyncRequired = isDispatchTaskCountPostSyncRequired(hFence, ctx.containsAnyRegularCmdList);

level_zero/core/source/cmdqueue/cmdqueue_imp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ struct CommandQueueImp : public CommandQueue {
8383
MOCKABLE_VIRTUAL NEO::WaitStatus reserveLinearStreamSize(size_t size);
8484
ze_command_queue_mode_t getSynchronousMode() const;
8585
virtual bool getPreemptionCmdProgramming() = 0;
86-
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect) override;
86+
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) override;
87+
void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) override;
8788

8889
protected:
8990
MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ TEST(CommandList, whenAsMutableIsCalledNullptrIsReturned) {
718718
class MockCommandQueueIndirectAccess : public Mock<CommandQueue> {
719719
public:
720720
MockCommandQueueIndirectAccess(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : Mock(device, csr, desc) {}
721-
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect) override {
721+
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) override {
722722
handleIndirectAllocationResidencyCalledTimes++;
723723
}
724724
uint32_t handleIndirectAllocationResidencyCalledTimes = 0;

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,10 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
527527
commandList->device = device;
528528
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
529529
commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
530+
ze_command_queue_desc_t desc = {};
531+
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
532+
MockCommandQueueHw<gfxCoreFamily> mockCommandQueue(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &desc);
533+
commandList->cmdQImmediate = &mockCommandQueue;
530534

531535
ze_group_count_t groupCount = {3, 2, 1};
532536
CmdListKernelLaunchParams launchParams = {};
@@ -537,6 +541,7 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
537541

538542
auto ultCsr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
539543
EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSize);
544+
commandList->cmdQImmediate = nullptr;
540545
}
541546

542547
HWTEST2_F(CmdlistAppendLaunchKernelTests,
@@ -710,6 +715,10 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
710715
commandList->device = device;
711716
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
712717
commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
718+
ze_command_queue_desc_t desc = {};
719+
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
720+
MockCommandQueueHw<gfxCoreFamily> mockCommandQueue(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &desc);
721+
commandList->cmdQImmediate = &mockCommandQueue;
713722

714723
ze_group_count_t groupCount = {3, 2, 1};
715724
CmdListKernelLaunchParams launchParams = {};
@@ -722,6 +731,7 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
722731
auto ultCsr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
723732
EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSize);
724733
EXPECT_EQ(privateScratchPerThreadSize, ultCsr->requiredPrivateScratchSize);
734+
commandList->cmdQImmediate = nullptr;
725735
}
726736

727737
using MultiReturnCommandListTest = Test<MultiReturnCommandListFixture>;

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,7 +1886,7 @@ TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidenc
18861886
auto mockSvmAllocsManager = std::make_unique<SVMAllocsManagerMock>(device->getDriverHandle()->getMemoryManager());
18871887
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get();
18881888

1889-
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock);
1889+
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock, false);
18901890
EXPECT_EQ(mockSvmAllocsManager->makeIndirectAllocationsResidentCalledTimes, 1u);
18911891
EXPECT_EQ(mockSvmAllocsManager->addInternalAllocationsToResidencyContainerCalledTimes, 0u);
18921892
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager;
@@ -1911,7 +1911,7 @@ TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidenc
19111911
auto mockSvmAllocsManager = std::make_unique<SVMAllocsManagerMock>(device->getDriverHandle()->getMemoryManager());
19121912
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get();
19131913

1914-
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock);
1914+
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock, false);
19151915
EXPECT_EQ(mockSvmAllocsManager->makeIndirectAllocationsResidentCalledTimes, 0u);
19161916
EXPECT_EQ(mockSvmAllocsManager->addInternalAllocationsToResidencyContainerCalledTimes, 1u);
19171917
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager;
@@ -1937,7 +1937,7 @@ TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidenc
19371937
auto mockSvmAllocsManager = std::make_unique<SVMAllocsManagerMock>(device->getDriverHandle()->getMemoryManager());
19381938
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get();
19391939

1940-
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock);
1940+
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock, false);
19411941
std::thread th([&] {
19421942
EXPECT_FALSE(mockSvmAllocsManager->mtxForIndirectAccess.try_lock());
19431943
});
@@ -1947,30 +1947,5 @@ TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidenc
19471947
commandQueue->destroy();
19481948
}
19491949

1950-
TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackDisabeldThenResidencyContainerFromCsrIsUsed) {
1951-
DebugManagerStateRestore restore;
1952-
DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
1953-
const ze_command_queue_desc_t desc{};
1954-
ze_result_t returnValue;
1955-
1956-
auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
1957-
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
1958-
device,
1959-
neoDevice->getDefaultEngine().commandStreamReceiver,
1960-
&desc,
1961-
false,
1962-
false,
1963-
returnValue));
1964-
std::unique_lock<std::mutex> lock;
1965-
auto mockSvmAllocsManager = std::make_unique<SVMAllocsManagerMock>(device->getDriverHandle()->getMemoryManager());
1966-
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get();
1967-
1968-
commandQueue->handleIndirectAllocationResidency({true, true, true}, lock);
1969-
EXPECT_EQ(commandQueue->csr->getResidencyAllocations().data(), mockSvmAllocsManager->passedContainer);
1970-
reinterpret_cast<WhiteBox<::L0::DriverHandle> *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager;
1971-
lock.unlock();
1972-
commandQueue->destroy();
1973-
}
1974-
19751950
} // namespace ult
19761951
} // namespace L0

0 commit comments

Comments
 (0)