Skip to content

Commit ec3e28e

Browse files
authored
[Codegen][IGEMM] Set convolution pre-padding as default (iree-org#21899)
As for now, all the previous large regressions caused by convolution pre-padding has been fixed, we'll set pre-padding as default for IGEMM path. In addition, this PR also removes a over-strict condition in `swapCollapseShapeWithSlice` pattern. --------- Signed-off-by: yzhang93 <[email protected]>
1 parent 3b62d23 commit ec3e28e

File tree

3 files changed

+11
-7
lines changed

3 files changed

+11
-7
lines changed

compiler/src/iree/compiler/Codegen/Common/Transforms.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,11 +484,13 @@ swapCollapseShapeWithSlice(RewriterBase &rewriter,
484484
llvm::zip_equal(collapsedSizes, collapsedOffsets,
485485
collapseShapeOp.getReassociationIndices())) {
486486
// CASE #1 - size and/or offset are dynamic.
487+
// TODO(vivian): For some special case, running this pattern with dynamic
488+
// `collapsedSize` may cause a dynamic allocation in workgroup which blocks
489+
// `GPUReduceBankConflictsPass`.
487490
if (isa<Value>(collapsedSize) || isa<Value>(collapsedOffset)) {
488491
// Special case especially for collapse shape of convolution filter in
489492
// IGEMM, while the offset is dynamic and the size is static.
490-
if (isa<Attribute>(collapsedSize) && isa<Value>(collapsedOffset) &&
491-
reassocIndices.size() != 1) {
493+
if (isa<Attribute>(collapsedSize) && isa<Value>(collapsedOffset)) {
492494
auto maybeStaticSize = getConstantIntValue(collapsedSize);
493495
if (!maybeStaticSize) {
494496
return rewriter.notifyMatchFailure(sliceOp,

compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ static llvm::cl::opt<bool>
127127
llvm::cl::opt<bool> clGPUPadConvolution(
128128
"iree-codegen-llvmgpu-igemm-pad-convolution",
129129
llvm::cl::desc("enable pre-padding for convolutions in igemm path"),
130-
llvm::cl::init(false));
130+
llvm::cl::init(true));
131131

132132
static llvm::cl::opt<bool>
133133
clUseDirectLoad("iree-llvmgpu-use-direct-load",

compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_igemm_tile_and_fuse.mlir

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ hal.executable private @main {
143143
// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
144144
// CHECK-DAG: %[[ASSUMED_B2:.+]] = memref.assume_alignment %[[B2]], 64
145145
// CHECK-DAG: %[[BUF2:.+]] = amdgpu.fat_raw_buffer_cast %[[ASSUMED_B2]]
146-
// CHECK-DAG: memref.alloc() : memref<2x1x32x18xf32, #gpu.address_space<workgroup>>
147-
// CHECK-DAG: memref.alloc() : memref<16x20xf16, #gpu.address_space<workgroup>>
148-
// CHECK-DAG: memref.alloc() : memref<2x1x32x20xf16, #gpu.address_space<workgroup>>
146+
// CHECK-DAG: memref.alloc() : memref<2x1x32x16xf32, #gpu.address_space<workgroup>>
147+
// CHECK-DAG: memref.alloc() : memref<16x16xf16, #gpu.address_space<workgroup>>
148+
// CHECK-DAG: memref.alloc() : memref<2x1x32x16xf16, #gpu.address_space<workgroup>>
149149
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
150150
// CHECK-DAG: %[[C721:.+]] = arith.constant 721 : index
151151
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -155,12 +155,14 @@ hal.executable private @main {
155155
// CHECK-DAG: %[[LHS_MM0:.+]] = vector.transfer_read {{.*}} vector<4xf16>
156156
// CHECK-DAG: %[[RHS_MM:.+]] = vector.transfer_read {{.*}} vector<4xf16>
157157
// CHECK-COUNT-1: amdgpu.mfma {{.*}}blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32
158-
// CHECK: %[[LOOP_T:.+]] = vector.shape_cast %[[LOOP]] : vector<1x1x1x1x4x1xf32> to vector<4xf32>
158+
// CHECK: %[[LOOP_T:.+]] = vector.shape_cast %[[LOOP]] : vector<1x1x1x1x4x1xf32> to vector<4x1x1xf32>
159159
// CHECK: vector.transfer_write %[[LOOP_T]]
160160
// Note there is a writeback loop here that is skipped to simplify the test.
161161
// CHECK: memref.copy {{.*}}#gpu.address_space<workgroup>> to {{.*}}#amdgpu.address_space<fat_raw_buffer>
162162
// CHECK: } {mapping = [#iree_codegen.workgroup_mapping<z>, #iree_codegen.workgroup_mapping<y>, #iree_codegen.workgroup_mapping<x>]}
163163

164+
// TODO(vivian): This test doesn't have reduce bank conflicts taken effect because of collapsing a dynamic allocation. Try to fix it.
165+
164166
// -----
165167

166168
#pipeline_layout = #hal.pipeline.layout<bindings = [

0 commit comments

Comments
 (0)