Skip to content

Commit f7d105f

Browse files
Create work partition allocation
Related-To: NEO-5546 Resolves: NEO-5561 Signed-off-by: Maciej Dziuban <[email protected]>
1 parent 1844875 commit f7d105f

File tree

17 files changed

+185
-4
lines changed

17 files changed

+185
-4
lines changed

opencl/source/utilities/logger.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,8 @@ const char *FileLogger<DebugLevel>::getAllocationTypeString(GraphicsAllocation c
335335
return "DEBUG_SBA_TRACKING_BUFFER";
336336
case GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA:
337337
return "DEBUG_MODULE_AREA";
338+
case GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE:
339+
return "WORK_PARTITION_SURFACE";
338340
default:
339341
return "ILLEGAL_VALUE";
340342
}

opencl/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,7 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenWriteMe
723723
GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL,
724724
GraphicsAllocation::AllocationType::PRIVATE_SURFACE,
725725
GraphicsAllocation::AllocationType::SCRATCH_SURFACE,
726+
GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE,
726727
GraphicsAllocation::AllocationType::BUFFER,
727728
GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY,
728729
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED,

opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include "shared/source/utilities/tag_allocator.h"
2222
#include "shared/test/common/helpers/debug_manager_state_restore.h"
2323
#include "shared/test/common/mocks/mock_graphics_allocation.h"
24+
#include "shared/test/common/mocks/ult_device_factory.h"
25+
#include "shared/test/common/test_macros/test_checks_shared.h"
2426

2527
#include "opencl/source/mem_obj/buffer.h"
2628
#include "opencl/source/platform/platform.h"
@@ -1231,6 +1233,78 @@ HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenTerminatingAtSecond
12311233
EXPECT_EQ(1u, confirmationCounter);
12321234
}
12331235

1236+
HWTEST_F(CommandStreamReceiverTest, givenDebugFlagWhenCreatingCsrThenSetEnableStaticPartitioningAccordingly) {
1237+
DebugManagerStateRestore restore{};
1238+
{
1239+
MockDevice device{};
1240+
EXPECT_FALSE(device.getUltCommandStreamReceiver<FamilyType>().staticWorkPartitioningEnabled);
1241+
}
1242+
{
1243+
DebugManager.flags.EnableStaticPartitioning.set(0);
1244+
MockDevice device{};
1245+
EXPECT_FALSE(device.getUltCommandStreamReceiver<FamilyType>().staticWorkPartitioningEnabled);
1246+
}
1247+
{
1248+
DebugManager.flags.EnableStaticPartitioning.set(1);
1249+
MockDevice device{};
1250+
EXPECT_TRUE(device.getUltCommandStreamReceiver<FamilyType>().staticWorkPartitioningEnabled);
1251+
}
1252+
}
1253+
1254+
HWTEST_F(CommandStreamReceiverTest, whenCreatingWorkPartitionAllocationThenInitializeContentsWithCopyEngine) {
1255+
REQUIRE_BLITTER_OR_SKIP(defaultHwInfo.get());
1256+
DebugManagerStateRestore restore{};
1257+
DebugManager.flags.EnableStaticPartitioning.set(0);
1258+
1259+
constexpr size_t subDeviceCount = 3;
1260+
UltDeviceFactory deviceFactory{1, subDeviceCount};
1261+
MockDevice &rootDevice = *deviceFactory.rootDevices[0];
1262+
rootDevice.getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
1263+
rootDevice.getRootDeviceEnvironment().getMutableHardwareInfo()->featureTable.ftrBcsInfo = 1;
1264+
UltCommandStreamReceiver<FamilyType> &csr = rootDevice.getUltCommandStreamReceiver<FamilyType>();
1265+
UltCommandStreamReceiver<FamilyType> *bcsCsrs[] = {
1266+
reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(rootDevice.getDeviceById(0)->getEngine(aub_stream::ENGINE_BCS, false, false).commandStreamReceiver),
1267+
reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(rootDevice.getDeviceById(1)->getEngine(aub_stream::ENGINE_BCS, false, false).commandStreamReceiver),
1268+
reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(rootDevice.getDeviceById(2)->getEngine(aub_stream::ENGINE_BCS, false, false).commandStreamReceiver),
1269+
};
1270+
const size_t bcsStarts[] = {
1271+
bcsCsrs[0]->commandStream.getUsed(),
1272+
bcsCsrs[1]->commandStream.getUsed(),
1273+
bcsCsrs[2]->commandStream.getUsed(),
1274+
};
1275+
1276+
csr.staticWorkPartitioningEnabled = true;
1277+
EXPECT_TRUE(csr.createWorkPartitionAllocation(rootDevice));
1278+
EXPECT_NE(nullptr, csr.getWorkPartitionAllocation());
1279+
1280+
EXPECT_LT(bcsStarts[0], bcsCsrs[0]->commandStream.getUsed());
1281+
EXPECT_LT(bcsStarts[1], bcsCsrs[1]->commandStream.getUsed());
1282+
EXPECT_LT(bcsStarts[2], bcsCsrs[2]->commandStream.getUsed());
1283+
}
1284+
1285+
HWTEST_F(CommandStreamReceiverTest, givenFailingMemoryManagerWhenCreatingWorkPartitionAllocationThenReturnFalse) {
1286+
struct FailingMemoryManager : OsAgnosticMemoryManager {
1287+
using OsAgnosticMemoryManager::OsAgnosticMemoryManager;
1288+
GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override {
1289+
return nullptr;
1290+
}
1291+
};
1292+
1293+
DebugManagerStateRestore restore{};
1294+
DebugManager.flags.EnableStaticPartitioning.set(0);
1295+
UltDeviceFactory deviceFactory{1, 2};
1296+
MockDevice &rootDevice = *deviceFactory.rootDevices[0];
1297+
UltCommandStreamReceiver<FamilyType> &csr = rootDevice.getUltCommandStreamReceiver<FamilyType>();
1298+
1299+
ExecutionEnvironment &executionEnvironment = *deviceFactory.rootDevices[0]->executionEnvironment;
1300+
executionEnvironment.memoryManager = std::make_unique<FailingMemoryManager>(executionEnvironment);
1301+
1302+
csr.staticWorkPartitioningEnabled = true;
1303+
DebugManager.flags.EnableStaticPartitioning.set(1);
1304+
EXPECT_FALSE(csr.createWorkPartitionAllocation(rootDevice));
1305+
EXPECT_EQ(nullptr, csr.getWorkPartitionAllocation());
1306+
}
1307+
12341308
class CommandStreamReceiverWithAubSubCaptureTest : public CommandStreamReceiverTest,
12351309
public ::testing::WithParamInterface<std::pair<bool, bool>> {};
12361310

opencl/test/unit_test/device/device_tests.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,48 @@ HWTEST_F(DeviceTest, givenDeviceWhenAskingForDefaultEngineThenReturnValidValue)
331331
EXPECT_FALSE(osContext->isLowPriority());
332332
}
333333

334+
HWTEST_F(DeviceTest, givenDebugFlagWhenCreatingRootDeviceWithSubDevicesThenWorkPartitionAllocationIsCreatedForRootDevice) {
335+
DebugManagerStateRestore restore{};
336+
{
337+
UltDeviceFactory deviceFactory{1, 2};
338+
EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
339+
EXPECT_EQ(nullptr, deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
340+
EXPECT_EQ(nullptr, deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
341+
}
342+
{
343+
DebugManager.flags.EnableStaticPartitioning.set(0);
344+
UltDeviceFactory deviceFactory{1, 2};
345+
EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
346+
EXPECT_EQ(nullptr, deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
347+
EXPECT_EQ(nullptr, deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
348+
}
349+
{
350+
DebugManager.flags.EnableStaticPartitioning.set(1);
351+
UltDeviceFactory deviceFactory{1, 2};
352+
EXPECT_NE(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
353+
EXPECT_EQ(nullptr, deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
354+
EXPECT_EQ(nullptr, deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
355+
}
356+
}
357+
358+
HWTEST_F(DeviceTest, givenDebugFlagWhenCreatingRootDeviceWithoutSubDevicesThenWorkPartitionAllocationIsNotCreated) {
359+
DebugManagerStateRestore restore{};
360+
{
361+
UltDeviceFactory deviceFactory{1, 1};
362+
EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
363+
}
364+
{
365+
DebugManager.flags.EnableStaticPartitioning.set(0);
366+
UltDeviceFactory deviceFactory{1, 1};
367+
EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
368+
}
369+
{
370+
DebugManager.flags.EnableStaticPartitioning.set(1);
371+
UltDeviceFactory deviceFactory{1, 1};
372+
EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation());
373+
}
374+
}
375+
334376
TEST(DeviceCreation, givenFtrSimulationModeFlagTrueWhenNoOtherSimulationFlagsArePresentThenIsSimulationReturnsTrue) {
335377
HardwareInfo hwInfo = *defaultHwInfo;
336378
hwInfo.featureTable.ftrSimulationMode = true;

opencl/test/unit_test/libult/ult_command_stream_receiver.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
5050
using BaseClass::requiresInstructionCacheFlush;
5151
using BaseClass::rootDeviceIndex;
5252
using BaseClass::sshState;
53+
using BaseClass::staticWorkPartitioningEnabled;
5354
using BaseClass::wasSubmittedToSingleSubdevice;
5455
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
5556
using BaseClass::CommandStreamReceiver::checkForNewResources;

opencl/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.inl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ static const GraphicsAllocation::AllocationType allocationTypesWith32BitAnd64KbP
212212
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED,
213213
GraphicsAllocation::AllocationType::PIPE,
214214
GraphicsAllocation::AllocationType::SCRATCH_SURFACE,
215+
GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE,
215216
GraphicsAllocation::AllocationType::PRIVATE_SURFACE,
216217
GraphicsAllocation::AllocationType::PRINTF_SURFACE,
217218
GraphicsAllocation::AllocationType::CONSTANT_SURFACE,
@@ -966,6 +967,7 @@ static const GraphicsAllocation::AllocationType allocationHaveToBeForcedTo48Bit[
966967
GraphicsAllocation::AllocationType::LINEAR_STREAM,
967968
GraphicsAllocation::AllocationType::MCS,
968969
GraphicsAllocation::AllocationType::SCRATCH_SURFACE,
970+
GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE,
969971
GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE,
970972
GraphicsAllocation::AllocationType::SHARED_IMAGE,
971973
GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY,

opencl/test/unit_test/test_files/igdrcl.config

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,4 +212,5 @@ EnableHostPointerImport = -1
212212
EnableHostUsmSupport = -1
213213
ForceBtpPrefetchMode = -1
214214
OverrideProfilingTimerResolution = -1
215-
PreferCopyEngineForCopyBufferToBuffer = -1
215+
PreferCopyEngineForCopyBufferToBuffer = -1
216+
EnableStaticPartitioning = -1

opencl/test/unit_test/utilities/file_logger_tests.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,7 @@ AllocationTypeTestCase allocationTypeValues[] = {
927927
{GraphicsAllocation::AllocationType::PRIVATE_SURFACE, "PRIVATE_SURFACE"},
928928
{GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER, "PROFILING_TAG_BUFFER"},
929929
{GraphicsAllocation::AllocationType::SCRATCH_SURFACE, "SCRATCH_SURFACE"},
930+
{GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE, "WORK_PARTITION_SURFACE"},
930931
{GraphicsAllocation::AllocationType::SHARED_BUFFER, "SHARED_BUFFER"},
931932
{GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE, "SHARED_CONTEXT_IMAGE"},
932933
{GraphicsAllocation::AllocationType::SHARED_IMAGE, "SHARED_IMAGE"},

shared/source/aub/aub_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class AubHelper : public NonCopyableOrMovableClass {
2424
case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL:
2525
case GraphicsAllocation::AllocationType::PRIVATE_SURFACE:
2626
case GraphicsAllocation::AllocationType::SCRATCH_SURFACE:
27+
case GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE:
2728
case GraphicsAllocation::AllocationType::BUFFER:
2829
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
2930
case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED:

shared/source/command_stream/command_stream_receiver.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
4848
indirectHeap[i] = nullptr;
4949
}
5050
internalAllocationStorage = std::make_unique<InternalAllocationStorage>(*this);
51+
52+
if (DebugManager.flags.EnableStaticPartitioning.get() == 1) {
53+
this->staticWorkPartitioningEnabled = true;
54+
}
5155
}
5256

5357
CommandStreamReceiver::~CommandStreamReceiver() {
@@ -226,6 +230,11 @@ void CommandStreamReceiver::cleanupResources() {
226230
getMemoryManager()->freeGraphicsMemory(clearColorAllocation);
227231
clearColorAllocation = nullptr;
228232
}
233+
234+
if (workPartitionAllocation) {
235+
getMemoryManager()->freeGraphicsMemory(workPartitionAllocation);
236+
workPartitionAllocation = nullptr;
237+
}
229238
}
230239

231240
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
@@ -474,6 +483,37 @@ bool CommandStreamReceiver::initializeTagAllocation() {
474483
return true;
475484
}
476485

486+
bool CommandStreamReceiver::createWorkPartitionAllocation(const Device &device) {
487+
if (!staticWorkPartitioningEnabled) {
488+
return false;
489+
}
490+
UNRECOVERABLE_IF(device.getNumAvailableDevices() < 2);
491+
492+
AllocationProperties properties{this->rootDeviceIndex, true, 4096u, GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE, true, false, deviceBitfield};
493+
this->workPartitionAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
494+
if (this->workPartitionAllocation == nullptr) {
495+
return false;
496+
}
497+
498+
for (uint32_t deviceIndex = 0; deviceIndex < deviceBitfield.size(); deviceIndex++) {
499+
if (!deviceBitfield.test(deviceIndex)) {
500+
continue;
501+
}
502+
503+
const uint32_t copySrc = deviceIndex;
504+
const Vec3<size_t> copySrcSize = {sizeof(copySrc), 1, 1};
505+
DeviceBitfield copyBitfield{};
506+
copyBitfield.set(deviceIndex);
507+
BlitOperationResult blitResult = BlitHelper::blitMemoryToAllocationBanks(device, workPartitionAllocation, 0, &copySrc, copySrcSize, copyBitfield);
508+
509+
if (blitResult != BlitOperationResult::Success) {
510+
return false;
511+
}
512+
}
513+
514+
return true;
515+
}
516+
477517
bool CommandStreamReceiver::createGlobalFenceAllocation() {
478518
auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
479519
if (!HwHelper::get(hwInfo->platform.eRenderCoreFamily).isFenceAllocationRequired(*hwInfo)) {

0 commit comments

Comments
 (0)