Skip to content

Commit 82e29fd

Browse files
Add bcs split control mask
Introduce debug variable to control which engines the tranfser will be split into Related-To: NEO-7173 Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent 908ddd0 commit 82e29fd

File tree

7 files changed

+65
-17
lines changed

7 files changed

+65
-17
lines changed

opencl/source/command_queue/command_queue.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -305,21 +305,28 @@ void CommandQueue::initializeBcsEngine(bool internalUsage) {
305305
}
306306

307307
void CommandQueue::constructBcsEnginesForSplit() {
308-
if (!this->bcsSplitInitialized) {
309-
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS2); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i += 2) {
310-
auto index = EngineHelpers::getBcsIndex(static_cast<aub_stream::EngineType>(i));
311-
if (!bcsEngines[index]) {
312-
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
313-
bcsEngines[index] = neoDevice.tryGetEngine(static_cast<aub_stream::EngineType>(i), EngineUsage::Regular);
314-
bcsEngineTypes.push_back(static_cast<aub_stream::EngineType>(i));
315-
if (bcsEngines[index]) {
316-
bcsEngines[index]->osContext->ensureContextInitialized();
317-
bcsEngines[index]->commandStreamReceiver->initDirectSubmission();
318-
}
308+
if (this->bcsSplitInitialized) {
309+
return;
310+
}
311+
312+
if (DebugManager.flags.SplitBcsMask.get() > 0) {
313+
this->splitEngines = DebugManager.flags.SplitBcsMask.get();
314+
}
315+
316+
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
317+
if (this->splitEngines.test(i) && !bcsEngines[i]) {
318+
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
319+
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
320+
bcsEngines[i] = neoDevice.tryGetEngine(engineType, EngineUsage::Regular);
321+
bcsEngineTypes.push_back(engineType);
322+
if (bcsEngines[i]) {
323+
bcsEngines[i]->osContext->ensureContextInitialized();
324+
bcsEngines[i]->commandStreamReceiver->initDirectSubmission();
319325
}
320326
}
321-
this->bcsSplitInitialized = true;
322327
}
328+
329+
this->bcsSplitInitialized = true;
323330
}
324331

325332
void CommandQueue::prepareHostPtrSurfaceForSplit(bool split, GraphicsAllocation &allocation) {

opencl/source/command_queue/command_queue.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
420420
bool isCopyOnly = false;
421421
bool bcsAllowed = false;
422422
bool bcsInitialized = false;
423+
423424
bool bcsSplitInitialized = false;
425+
BcsInfoMask splitEngines = EngineHelpers::evenLinkedCopyEnginesMask;
424426

425427
LinearStream *commandStream = nullptr;
426428

opencl/source/command_queue/enqueue_common.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,13 +1136,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
11361136

11371137
StackVec<std::unique_lock<CommandStreamReceiver::MutexType>, 3u> locks;
11381138
StackVec<CommandStreamReceiver *, 3u> copyEngines;
1139-
for (auto i = static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS2); i <= static_cast<uint32_t>(aub_stream::EngineType::ENGINE_BCS8); i += 2) {
1140-
auto bcs = getBcsCommandStreamReceiver(static_cast<aub_stream::EngineType>(i));
1141-
if (bcs) {
1142-
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
1143-
copyEngines.push_back(bcs);
1139+
1140+
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
1141+
if (this->splitEngines.test(i)) {
1142+
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
1143+
auto bcs = getBcsCommandStreamReceiver(engineType);
1144+
if (bcs) {
1145+
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
1146+
copyEngines.push_back(bcs);
1147+
}
11441148
}
11451149
}
1150+
11461151
DEBUG_BREAK_IF(copyEngines.size() == 0);
11471152
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
11481153

opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,37 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, whenConstructBcsEnginesForSplitThenConta
129129
EXPECT_EQ(4u, queue->countBcsEngines());
130130
}
131131

132+
HWTEST2_F(CommandQueuePvcAndLaterTests, givenSplitBcsMaskWhenConstructBcsEnginesForSplitThenContainsGivenBcsEngines, IsAtLeastXeHpcCore) {
133+
DebugManagerStateRestore restorer;
134+
std::bitset<bcsInfoMaskSize> bcsMask = 0b100110101;
135+
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
136+
DebugManager.flags.SplitBcsMask.set(static_cast<int>(bcsMask.to_ulong()));
137+
HardwareInfo hwInfo = *defaultHwInfo;
138+
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
139+
hwInfo.capabilityTable.blitterOperationsSupported = true;
140+
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
141+
MockClDevice clDevice{device};
142+
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
143+
ClDeviceVector clDevices{&clDeviceId, 1u};
144+
cl_int retVal{};
145+
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
146+
EXPECT_EQ(CL_SUCCESS, retVal);
147+
auto queue = std::make_unique<MockCommandQueue>(*context);
148+
EXPECT_EQ(0u, queue->countBcsEngines());
149+
150+
queue->constructBcsEnginesForSplit();
151+
152+
EXPECT_EQ(5u, queue->countBcsEngines());
153+
154+
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
155+
if (bcsMask.test(i)) {
156+
EXPECT_NE(queue->bcsEngines[i], nullptr);
157+
} else {
158+
EXPECT_EQ(queue->bcsEngines[i], nullptr);
159+
}
160+
}
161+
}
162+
132163
HWTEST2_F(CommandQueuePvcAndLaterTests, whenSelectCsrForHostPtrAllocationThenReturnProperEngine, IsAtLeastXeHpcCore) {
133164
DebugManagerStateRestore restorer;
134165
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default,
288288
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
289289
DECLARE_DEBUG_VARIABLE(int32_t, PreferInternalBcsEngine, -1, "-1: default, 0:disabled, 1: enabled. When enabled use internal BCS engine for internal transfers, when disabled use regular engine")
290290
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsCopy, -1, "-1: default, 0:disabled, 1: enabled. When enqueues copy to main copy engine then split between even linked copy engines")
291+
DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsMask, 0, "0: default, >0: bitmask: indicates bcs engines for split")
291292
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
292293

293294
/*DIRECT SUBMISSION FLAGS*/

shared/source/helpers/engine_node_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ constexpr bool isLinkBcs(aub_stream::EngineType engineType) {
6565
}
6666

6767
constexpr uint32_t numLinkedCopyEngines = 8u;
68+
constexpr size_t evenLinkedCopyEnginesMask = 0b101010100;
6869

6970
bool linkCopyEnginesSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield);
7071

shared/test/common/test_files/igdrcl.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ AssignBCSAtEnqueue = -1
394394
DeferCmdQGpgpuInitialization = -1
395395
DeferCmdQBcsInitialization = -1
396396
SplitBcsCopy = -1
397+
SplitBcsMask = 0
397398
PreferInternalBcsEngine = -1
398399
ReuseKernelBinaries = -1
399400
EnableChipsetUniqueUUID = -1

0 commit comments

Comments
 (0)