Skip to content

Commit 64fbfb2

Browse files
Improve iterating over existing CommandStreamReceivers
Change-Id: I12a10852d43c625ec5521ae91918fcb12e1a6aec Signed-off-by: Dunajski, Bartosz <[email protected]>
1 parent d99e833 commit 64fbfb2

21 files changed

+114
-108
lines changed

runtime/device/device.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,12 @@ bool Device::createEngines(const HardwareInfo *pHwInfo) {
173173
}
174174
executionEnvironment->initializeMemoryManager(getEnabled64kbPages(), enableLocalMemory, getDeviceIndex(), deviceCsrIndex);
175175

176-
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(gpgpuEngines[deviceCsrIndex], 1, preemptionMode);
177176
auto commandStreamReceiver = executionEnvironment->commandStreamReceivers[getDeviceIndex()][deviceCsrIndex].get();
177+
178+
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver, gpgpuEngines[deviceCsrIndex],
179+
1, preemptionMode);
178180
commandStreamReceiver->setupContext(*osContext);
181+
179182
if (!commandStreamReceiver->initializeTagAllocation()) {
180183
return false;
181184
}

runtime/memory_manager/deferrable_allocation_deletion.cpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
#include "runtime/command_stream/command_stream_receiver.h"
9+
#include "runtime/helpers/engine_control.h"
910
#include "runtime/memory_manager/deferrable_allocation_deletion.h"
1011
#include "runtime/memory_manager/memory_manager.h"
1112
#include "runtime/os_interface/os_context.h"
@@ -16,17 +17,14 @@ DeferrableAllocationDeletion::DeferrableAllocationDeletion(MemoryManager &memory
1617
graphicsAllocation(graphicsAllocation) {}
1718
bool DeferrableAllocationDeletion::apply() {
1819
if (graphicsAllocation.isUsed()) {
19-
20-
for (auto &deviceCsrs : memoryManager.getCommandStreamReceivers()) {
21-
for (auto &csr : deviceCsrs) {
22-
auto contextId = csr->getOsContext().getContextId();
23-
if (graphicsAllocation.isUsedByOsContext(contextId)) {
24-
auto currentContextTaskCount = *csr->getTagAddress();
25-
if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
26-
graphicsAllocation.releaseUsageInOsContext(contextId);
27-
} else {
28-
csr->flushBatchedSubmissions();
29-
}
20+
for (auto &engine : memoryManager.getRegisteredEngines()) {
21+
auto contextId = engine.osContext->getContextId();
22+
if (graphicsAllocation.isUsedByOsContext(contextId)) {
23+
auto currentContextTaskCount = *engine.commandStreamReceiver->getTagAddress();
24+
if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
25+
graphicsAllocation.releaseUsageInOsContext(contextId);
26+
} else {
27+
engine.commandStreamReceiver->flushBatchedSubmissions();
3028
}
3129
}
3230
}

runtime/memory_manager/memory_manager.cpp

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,14 @@
2929
#include <algorithm>
3030

3131
namespace OCLRT {
32-
MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory,
33-
ExecutionEnvironment &executionEnvironment) : allocator32Bit(nullptr), enable64kbpages(enable64kbpages),
34-
localMemorySupported(enableLocalMemory),
35-
executionEnvironment(executionEnvironment),
36-
hostPtrManager(std::make_unique<HostPtrManager>()),
37-
multiContextResourceDestructor(std::make_unique<DeferredDeleter>()) {
38-
registeredOsContexts.resize(1);
39-
};
32+
MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory, ExecutionEnvironment &executionEnvironment)
33+
: allocator32Bit(nullptr), enable64kbpages(enable64kbpages), localMemorySupported(enableLocalMemory),
34+
executionEnvironment(executionEnvironment), hostPtrManager(std::make_unique<HostPtrManager>()),
35+
multiContextResourceDestructor(std::make_unique<DeferredDeleter>()){};
4036

4137
MemoryManager::~MemoryManager() {
42-
for (auto osContext : registeredOsContexts) {
43-
if (osContext) {
44-
osContext->decRefInternal();
45-
}
38+
for (auto &engine : registeredEngines) {
39+
engine.osContext->decRefInternal();
4640
}
4741
}
4842

@@ -151,15 +145,14 @@ void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocatio
151145
multiContextResourceDestructor->drain(false);
152146
return;
153147
}
154-
for (auto &deviceCsrs : getCommandStreamReceivers()) {
155-
for (auto &csr : deviceCsrs) {
156-
auto osContextId = csr->getOsContext().getContextId();
157-
auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
158-
if (gfxAllocation->isUsedByOsContext(osContextId) &&
159-
allocationTaskCount > *csr->getTagAddress()) {
160-
csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
161-
return;
162-
}
148+
for (auto &engine : getRegisteredEngines()) {
149+
auto osContextId = engine.osContext->getContextId();
150+
auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
151+
if (gfxAllocation->isUsedByOsContext(osContextId) &&
152+
allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) {
153+
engine.commandStreamReceiver->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation),
154+
TEMPORARY_ALLOCATION);
155+
return;
163156
}
164157
}
165158
}
@@ -184,14 +177,13 @@ bool MemoryManager::isMemoryBudgetExhausted() const {
184177
return false;
185178
}
186179

187-
OsContext *MemoryManager::createAndRegisterOsContext(EngineInstanceT engineType, uint32_t numSupportedDevices, PreemptionMode preemptionMode) {
180+
OsContext *MemoryManager::createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, EngineInstanceT engineType,
181+
uint32_t numSupportedDevices, PreemptionMode preemptionMode) {
188182
auto contextId = ++latestContextId;
189-
if (contextId + 1 > registeredOsContexts.size()) {
190-
registeredOsContexts.resize(contextId + 1);
191-
}
192183
auto osContext = new OsContext(executionEnvironment.osInterface.get(), contextId, numSupportedDevices, engineType, preemptionMode);
193184
osContext->incRefInternal();
194-
registeredOsContexts[contextId] = osContext;
185+
186+
registeredEngines.emplace_back(commandStreamReceiver, osContext);
195187

196188
return osContext;
197189
}
@@ -354,8 +346,8 @@ GraphicsAllocation *MemoryManager::allocateGraphicsMemoryForImage(const Allocati
354346
return allocateGraphicsMemoryForImageImpl(allocationDataWithSize, std::move(gmm));
355347
}
356348

357-
const CsrContainer &MemoryManager::getCommandStreamReceivers() const {
358-
return executionEnvironment.commandStreamReceivers;
349+
EngineControlContainer &MemoryManager::getRegisteredEngines() {
350+
return registeredEngines;
359351
}
360352

361353
CommandStreamReceiver *MemoryManager::getDefaultCommandStreamReceiver(uint32_t deviceId) const {

runtime/memory_manager/memory_manager.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "public/cl_ext_private.h"
1010
#include "runtime/command_stream/preemption_mode.h"
1111
#include "runtime/helpers/aligned_memory.h"
12+
#include "runtime/helpers/engine_control.h"
1213
#include "runtime/memory_manager/graphics_allocation.h"
1314
#include "runtime/memory_manager/host_ptr_defines.h"
1415
#include "runtime/os_interface/32bit_memory.h"
@@ -29,6 +30,7 @@ class OsContext;
2930
struct ImageInfo;
3031

3132
using CsrContainer = std::vector<std::vector<std::unique_ptr<CommandStreamReceiver>>>;
33+
using EngineControlContainer = std::vector<EngineControl>;
3234

3335
enum AllocationUsage {
3436
TEMPORARY_ALLOCATION,
@@ -177,10 +179,11 @@ class MemoryManager {
177179
::alignedFree(ptr);
178180
}
179181

180-
OsContext *createAndRegisterOsContext(EngineInstanceT engineType, uint32_t numSupportedDevices, PreemptionMode preemptionMode);
181-
uint32_t getOsContextCount() { return static_cast<uint32_t>(registeredOsContexts.size()); }
182+
OsContext *createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, EngineInstanceT engineType,
183+
uint32_t numSupportedDevices, PreemptionMode preemptionMode);
184+
uint32_t getRegisteredEnginesCount() { return static_cast<uint32_t>(registeredEngines.size()); }
182185
CommandStreamReceiver *getDefaultCommandStreamReceiver(uint32_t deviceId) const;
183-
const CsrContainer &getCommandStreamReceivers() const;
186+
EngineControlContainer &getRegisteredEngines();
184187
HostPtrManager *getHostPtrManager() const { return hostPtrManager.get(); }
185188
void setDefaultEngineIndex(uint32_t index) { defaultEngineIndex = index; }
186189

@@ -258,7 +261,7 @@ class MemoryManager {
258261
bool enable64kbpages = false;
259262
bool localMemorySupported = false;
260263
ExecutionEnvironment &executionEnvironment;
261-
std::vector<OsContext *> registeredOsContexts;
264+
EngineControlContainer registeredEngines;
262265
std::unique_ptr<HostPtrManager> hostPtrManager;
263266
uint32_t latestContextId = std::numeric_limits<uint32_t>::max();
264267
uint32_t defaultEngineIndex = 0;

runtime/os_interface/windows/wddm_memory_manager.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -294,12 +294,10 @@ void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation
294294
WddmAllocation *input = static_cast<WddmAllocation *>(gfxAllocation);
295295
DEBUG_BREAK_IF(!validateAllocation(input));
296296

297-
for (auto &osContext : this->registeredOsContexts) {
298-
if (osContext) {
299-
auto &residencyController = osContext->get()->getResidencyController();
300-
auto lock = residencyController.acquireLock();
301-
residencyController.removeFromTrimCandidateListIfUsed(input, true);
302-
}
297+
for (auto &engine : this->registeredEngines) {
298+
auto &residencyController = engine.osContext->get()->getResidencyController();
299+
auto lock = residencyController.acquireLock();
300+
residencyController.removeFromTrimCandidateListIfUsed(input, true);
303301
}
304302

305303
DEBUG_BREAK_IF(DebugManager.flags.CreateMultipleDevices.get() == 0 &&
@@ -348,8 +346,8 @@ bool WddmMemoryManager::tryDeferDeletions(D3DKMT_HANDLE *handles, uint32_t alloc
348346
}
349347

350348
bool WddmMemoryManager::isMemoryBudgetExhausted() const {
351-
for (auto osContext : this->registeredOsContexts) {
352-
if (osContext != nullptr && osContext->get()->getResidencyController().isMemoryBudgetExhausted()) {
349+
for (auto &engine : this->registeredEngines) {
350+
if (engine.osContext->get()->getResidencyController().isMemoryBudgetExhausted()) {
353351
return true;
354352
}
355353
}

runtime/os_interface/windows/wddm_memory_manager.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ class WddmMemoryManager : public MemoryManager {
4545
AllocationStatus populateOsHandles(OsHandleStorage &handleStorage) override;
4646
void cleanOsHandles(OsHandleStorage &handleStorage) override;
4747

48-
OsContext *getRegisteredOsContext(uint32_t osContextId) { return registeredOsContexts[osContextId]; }
49-
5048
void obtainGpuAddressFromFragments(WddmAllocation *allocation, OsHandleStorage &handleStorage);
5149

5250
GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, size_t hostPtrSize, const void *hostPtr) override;

unit_tests/command_stream/command_stream_receiver_with_aub_dump_tests.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,10 @@ struct CommandStreamReceiverWithAubDumpTest : public ::testing::TestWithParam<bo
109109
executionEnvironment.memoryManager.reset(memoryManager);
110110
ASSERT_NE(nullptr, memoryManager);
111111

112-
auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(
113-
getChosenEngineType(DEFAULT_TEST_PLATFORM::hwInfo), 1, PreemptionHelper::getDefaultPreemptionMode(DEFAULT_TEST_PLATFORM::hwInfo));
112+
auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(csrWithAubDump,
113+
getChosenEngineType(DEFAULT_TEST_PLATFORM::hwInfo),
114+
1, PreemptionHelper::getDefaultPreemptionMode(DEFAULT_TEST_PLATFORM::hwInfo));
114115
csrWithAubDump->setupContext(*osContext);
115-
if (csrWithAubDump->aubCSR) {
116-
csrWithAubDump->aubCSR->setupContext(*osContext);
117-
}
118116
}
119117

120118
void TearDown() override {

unit_tests/device/device_tests.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ TEST(DeviceCreation, givenDefaultHwCsrInDebugVarsWhenDeviceIsCreatedThenIsSimula
152152
TEST(DeviceCreation, givenDeviceWhenItIsCreatedThenOsContextIsRegistredInMemoryManager) {
153153
auto device = std::unique_ptr<Device>(MockDevice::createWithNewExecutionEnvironment<Device>(nullptr));
154154
auto memoryManager = device->getMemoryManager();
155-
EXPECT_EQ(HwHelper::get(platformDevices[0]->pPlatform->eRenderCoreFamily).getGpgpuEngineInstances().size(), memoryManager->getOsContextCount());
155+
EXPECT_EQ(HwHelper::get(platformDevices[0]->pPlatform->eRenderCoreFamily).getGpgpuEngineInstances().size(), memoryManager->getRegisteredEnginesCount());
156156
}
157157

158158
TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachOsContextHasUniqueId) {
@@ -164,11 +164,19 @@ TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachOsContextHasUniqu
164164
auto device1 = std::unique_ptr<Device>(Device::create<Device>(nullptr, &executionEnvironment, 0u));
165165
auto device2 = std::unique_ptr<Device>(Device::create<Device>(nullptr, &executionEnvironment, 1u));
166166

167+
auto &registeredEngines = executionEnvironment.memoryManager->getRegisteredEngines();
168+
EXPECT_EQ(numGpgpuEngines * numDevices, registeredEngines.size());
169+
167170
for (uint32_t i = 0; i < numGpgpuEngines; i++) {
168171
EXPECT_EQ(i, device1->getEngine(i).osContext->getContextId());
169172
EXPECT_EQ(i + numGpgpuEngines, device2->getEngine(i).osContext->getContextId());
173+
174+
EXPECT_EQ(registeredEngines[i].commandStreamReceiver,
175+
device1->getEngine(i).commandStreamReceiver);
176+
EXPECT_EQ(registeredEngines[i + numGpgpuEngines].commandStreamReceiver,
177+
device2->getEngine(i).commandStreamReceiver);
170178
}
171-
EXPECT_EQ(numGpgpuEngines * numDevices, executionEnvironment.memoryManager->getOsContextCount());
179+
EXPECT_EQ(numGpgpuEngines * numDevices, executionEnvironment.memoryManager->getRegisteredEnginesCount());
172180
}
173181

174182
TEST(DeviceCreation, givenMultiDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateDeviceIndex) {

unit_tests/fixtures/memory_allocator_fixture.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ class MemoryAllocatorFixture : public MemoryManagementFixture {
2626
memoryManager = new MockMemoryManager(false, false, *executionEnvironment);
2727
executionEnvironment->memoryManager.reset(memoryManager);
2828
csr = memoryManager->getDefaultCommandStreamReceiver(0);
29-
csr->setupContext(*memoryManager->createAndRegisterOsContext(HwHelper::get(platformDevices[0]->pPlatform->eRenderCoreFamily).getGpgpuEngineInstances()[0], 1, PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0])));
29+
auto engineType = HwHelper::get(platformDevices[0]->pPlatform->eRenderCoreFamily).getGpgpuEngineInstances()[0];
30+
auto osContext = memoryManager->createAndRegisterOsContext(csr, engineType, 1, PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]));
31+
csr->setupContext(*osContext);
3032
}
3133

3234
void TearDown() override {

unit_tests/fixtures/memory_manager_fixture.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ void MemoryManagerWithCsrFixture::SetUp() {
2121
csr->tagAddress = &currentGpuTag;
2222
executionEnvironment.commandStreamReceivers.resize(1);
2323
executionEnvironment.commandStreamReceivers[0].push_back(std::unique_ptr<CommandStreamReceiver>(csr));
24-
csr->setupContext(*memoryManager->createAndRegisterOsContext(HwHelper::get(platformDevices[0]->pPlatform->eRenderCoreFamily).getGpgpuEngineInstances()[0], 1, PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0])));
24+
auto engine = HwHelper::get(platformDevices[0]->pPlatform->eRenderCoreFamily).getGpgpuEngineInstances()[0];
25+
auto osContext = memoryManager->createAndRegisterOsContext(csr, engine, 1, PreemptionHelper::getDefaultPreemptionMode(*platformDevices[0]));
26+
csr->setupContext(*osContext);
2527
}
2628

2729
void MemoryManagerWithCsrFixture::TearDown() {

0 commit comments

Comments
 (0)