Skip to content

Commit c2f864d

Browse files
authored
[HOTFIX][BACKPORT] Manually add missing perf config for MI200 to avoid perf regression (#1818)
* hot fix: add missing perfConfig manually to recover performance on Mi200 * formattign * fix test
1 parent 1655ca0 commit c2f864d

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ const InitParamsAccel PopulateParamsXDL::initParametersForward8BitConv[PopulateP
233233
{128,64,16,128,16,8,1,true,true},
234234
{64,32,4,64,16,16,1,true,true},
235235
{32,16,8,16,16,16,1,true,true},
236+
{64,256,4,32,16,4,1,true,true},
236237
{64,256,4,32,32,8,1,true,true},
237238
{256,32,4,64,16,16,1,true,true},
238239
{32,32,32,32,16,16,1,true,true},
@@ -270,7 +271,7 @@ static const InitParamsAccel initParametersI8Gemm[nInitParametersI8Gemm];
270271
// END_GEMM_XDL_i8_DECS
271272

272273
// BEGIN_CONV_XDL_i8_DECS
273-
static constexpr size_t nInitParametersForward8BitConv = 24;
274+
static constexpr size_t nInitParametersForward8BitConv = 25;
274275
static const InitParamsAccel initParametersForward8BitConv[nInitParametersForward8BitConv];
275276
// END_CONV_XDL_i8_DECS
276277

mlir/test/Dialect/Rock/affix_tuning_params.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func.func @rock_conv_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x
4949
func.func @rock_conv_i8(%filter : memref<1x128x8x3x3xi8>, %input : memref<128x1x8x32x32xi8>, %output : memref<128x1x128x30x30xi32>) {
5050
// CHECK: rock.conv
5151
// CHECK-SAME: derivedBlockSize = 256
52-
// CHECK-SAME: params = #rock.xdlops_gemm_derived_params<kpackPerBlock = 4, mPerBlock = 64, nPerBlock = 256, kpack = 8, mPerWave = 32, nPerWave = 128, mnPerXdl = 32, splitKFactor = 1, forceUnroll = true>
52+
// CHECK-SAME: params = #rock.xdlops_gemm_derived_params<kpackPerBlock = 4, mPerBlock = 64, nPerBlock = 256, kpack = 4, mPerWave = 32, nPerWave = 128, mnPerXdl = 16, splitKFactor = 1, forceUnroll = true>
5353
// GRID: rock.gridwise_gemm
5454
// GRID-SAME: gridSize = 900
5555
rock.conv(%filter, %input, %output) features = mfma|dot|atomic_add|atomic_add_f16 {

0 commit comments

Comments
 (0)