Skip to content

Commit 0466317

Browse files
refactor: change encoder for thread group over dispatch 2/n
- bind algorithms to input arguments Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent 7103af5 commit 0466317

File tree

2 files changed

+15
-19
lines changed

2 files changed

+15
-19
lines changed

shared/source/command_container/command_encoder_xehp_and_later.inl

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,26 +1143,23 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
11431143
threadsPerXeCore /= 2;
11441144
}
11451145
auto tgDispatchSizeSelected = 8;
1146-
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
11471146

1148-
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
1149-
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
1147+
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
1148+
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
11501149
tgDispatchSizeSelected /= 2;
11511150
}
1152-
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
1153-
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
1151+
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
1152+
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
11541153
tgDispatchSizeSelected /= 2;
11551154
}
11561155
}
11571156

1158-
auto workgroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension();
1159-
11601157
// make sure we fit all xe core
1161-
while (workgroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
1158+
while (threadGroupCount / tgDispatchSizeSelected < hwInfo.gtSystemInfo.MaxSubSlicesSupported * tileCount && tgDispatchSizeSelected > 1) {
11621159
tgDispatchSizeSelected /= 2;
11631160
}
11641161

1165-
auto threadCountPerGrouping = tgDispatchSizeSelected * numberOfThreadsInThreadGroup;
1162+
auto threadCountPerGrouping = tgDispatchSizeSelected * threadsPerThreadGroup;
11661163
// make sure we do not use more threads then present on each xe core
11671164
while (threadCountPerGrouping > threadsPerXeCore && tgDispatchSizeSelected > 1) {
11681165
tgDispatchSizeSelected /= 2;
@@ -1187,26 +1184,25 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadGroupDispatch(InterfaceDescrip
11871184
uint32_t availableThreadCount = gfxCoreHelper.calculateAvailableThreadCount(hwInfo, grfCount);
11881185
availableThreadCount *= tileCount;
11891186

1190-
uint32_t numberOfThreadsInThreadGroup = interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup();
1191-
uint32_t dispatchedTotalThreadCount = numberOfThreadsInThreadGroup * threadGroupCount;
1192-
UNRECOVERABLE_IF(numberOfThreadsInThreadGroup == 0u);
1187+
uint32_t dispatchedTotalThreadCount = threadsPerThreadGroup * threadGroupCount;
1188+
UNRECOVERABLE_IF(threadsPerThreadGroup == 0u);
11931189
auto tgDispatchSizeSelected = 1u;
11941190

11951191
if (dispatchedTotalThreadCount <= availableThreadCount) {
11961192
tgDispatchSizeSelected = 1;
1197-
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
1193+
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize8) {
11981194
tgDispatchSizeSelected = 8;
1199-
} else if (numberOfThreadsInThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
1195+
} else if (threadsPerThreadGroup <= maxThreadsInTGForTGDispatchSize4) {
12001196
tgDispatchSizeSelected = 4;
12011197
} else {
12021198
tgDispatchSizeSelected = 2;
12031199
}
1204-
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
1205-
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
1200+
if (threadGroupDimensions[0] > 1 && (threadGroupDimensions[1] > 1 || threadGroupDimensions[2] > 1)) {
1201+
while (threadGroupDimensions[0] % tgDispatchSizeSelected != 0) {
12061202
tgDispatchSizeSelected /= 2;
12071203
}
1208-
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
1209-
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
1204+
} else if (threadGroupDimensions[1] > 1 && threadGroupDimensions[2] > 1) {
1205+
while (threadGroupDimensions[1] % tgDispatchSizeSelected != 0) {
12101206
tgDispatchSizeSelected /= 2;
12111207
}
12121208
}

shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptor
3333
const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd) {
3434
const auto &productHelper = device.getProductHelper();
3535
if (productHelper.isDisableOverdispatchAvailable(hwInfo)) {
36-
if (interfaceDescriptor.getNumberOfThreadsInGpgpuThreadGroup() == 1) {
36+
if (threadsPerThreadGroup == 1) {
3737
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(2u));
3838
} else {
3939
interfaceDescriptor.setThreadGroupDispatchSize(static_cast<INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE>(3u));

0 commit comments

Comments
 (0)