Skip to content

Commit aef6e1f

Browse files
[GPU] Bail out in GPUReduceBankConflicts if we have collapse_shape user (#18863)
This is unsupported by upstream and can lead to a compiler error. llvm/llvm-project#112994 Progress towards: #18858 --------- Signed-off-by: Nirvedh <[email protected]>
1 parent 8ce8bed commit aef6e1f

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

compiler/src/iree/compiler/Codegen/Common/GPU/GPUReduceBankConflicts.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,23 @@ namespace mlir::iree_compiler {
1818

1919
namespace {
2020

21+
/// Check if AllocOp has a CollapseShapeOp user.
22+
static bool hasCollapseShapeUser(memref::AllocOp allocOp) {
23+
SmallVector<Operation *> users(allocOp->getUsers());
24+
while (!users.empty()) {
25+
auto user = users.pop_back_val();
26+
if (isa<memref::CollapseShapeOp>(user)) {
27+
return true;
28+
}
29+
if (isa<ViewLikeOpInterface>(user)) {
30+
for (auto u : user->getUsers()) {
31+
users.push_back(u);
32+
}
33+
}
34+
}
35+
return false;
36+
}
37+
2138
/// Pad out the inner dimension of the `memref.alloc` op in order reduce the
2239
/// chances to have bank conflicts when reading 2D shapes within shared memory.
2340
static void padAlloc(MLIRContext *context, memref::AllocOp allocOp,
@@ -28,6 +45,12 @@ static void padAlloc(MLIRContext *context, memref::AllocOp allocOp,
2845
int64_t innerDim = allocOpShape.back();
2946
if (ShapedType::isDynamic(innerDim))
3047
return;
48+
49+
// Return if we have CollapseShape op as an user as padding in that case is
50+
// unsupported.
51+
if (hasCollapseShapeUser(allocOp))
52+
return;
53+
3154
Type elType = allocOp.getType().getElementType();
3255
unsigned bitwidth =
3356
mlir::DataLayout::closest(allocOp).getTypeSizeInBits(elType);

compiler/src/iree/compiler/Codegen/Common/GPU/test/reduce_bank_conflicts.mlir

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,66 @@ func.func @pad_alloc_expand_shape(%a: memref<1024x1024xf32>) {
4747
return
4848
}
4949

50+
// -----
51+
// CHECK-LABEL: func.func @no_pad_alloc_collapse_shape
52+
// CHECK: %[[A:.*]] = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
53+
// CHECK: %[[C:.*]] = memref.collapse_shape %[[A]] {{\[}}[0], [1, 2], [3, 4]]
54+
// CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into
55+
// CHECK-SAME: memref<4x32x64xf32, #gpu.address_space<workgroup>>
56+
// CHECK: %[[C0:.*]] = arith.constant 0 : index
57+
// CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
58+
// CHECK: %[[VEC_READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CST_0]] {in_bounds = [true]} :
59+
// CHECK-SAME: memref<1024x1024xf32>, vector<4xf32>
60+
// CHECK: vector.transfer_write %[[VEC_READ]], %[[C]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true]} :
61+
// CHECK-SAME: vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>
62+
63+
64+
func.func @no_pad_alloc_collapse_shape(%a: memref<1024x1024xf32>) {
65+
%0 = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
66+
%1 = memref.collapse_shape %0 [[0], [1, 2], [3, 4]]
67+
: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into memref<4x32x64xf32, #gpu.address_space<workgroup>>
68+
%c0 = arith.constant 0 : index
69+
%cst_0 = arith.constant 0.000000e+00 : f32
70+
%3 = vector.transfer_read %a[%c0, %c0], %cst_0 {in_bounds = [true]} :
71+
memref<1024x1024xf32>, vector<4xf32>
72+
vector.transfer_write %3, %1[%c0, %c0, %c0] {in_bounds = [true]} :
73+
vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>
74+
return
75+
}
76+
77+
// -----
78+
79+
// CHECK-LABEL: func.func @no_pad_alloc_collapse_shape_throughsubview
80+
// CHECK: %[[A:.*]] = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
81+
// CHECK: %[[S:.*]] = memref.subview %[[A]][0, 0, 0, 0, 0] [4, 2, 16, 8, 8] [1, 1, 1, 1, 1] :
82+
// CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> to
83+
// CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
84+
// CHECK: %[[C:.*]] = memref.collapse_shape %[[S]] {{\[}}[0], [1, 2], [3, 4]]
85+
// CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into
86+
// CHECK-SAME: memref<4x32x64xf32, #gpu.address_space<workgroup>>
87+
// CHECK: %[[C0:.*]] = arith.constant 0 : index
88+
// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
89+
// CHECK: %[[VEC_READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CST]] {in_bounds = [true]} :
90+
// CHECK-SAME: memref<1024x1024xf32>, vector<4xf32>
91+
// CHECK: vector.transfer_write %[[VEC_READ]], %[[C]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true]} :
92+
// CHECK-SAME: vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>
93+
94+
95+
func.func @no_pad_alloc_collapse_shape_throughsubview(%a: memref<1024x1024xf32>) {
96+
%0 = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
97+
%subview = memref.subview %0[0, 0, 0, 0, 0] [4, 2, 16, 8, 8] [1, 1, 1, 1, 1]
98+
: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> to memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
99+
%1 = memref.collapse_shape %subview [[0], [1, 2], [3, 4]]
100+
: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into memref<4x32x64xf32, #gpu.address_space<workgroup>>
101+
%c0 = arith.constant 0 : index
102+
%cst_0 = arith.constant 0.000000e+00 : f32
103+
%3 = vector.transfer_read %a[%c0, %c0], %cst_0 {in_bounds = [true]} :
104+
memref<1024x1024xf32>, vector<4xf32>
105+
vector.transfer_write %3, %1[%c0, %c0, %c0] {in_bounds = [true]} :
106+
vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>
107+
return
108+
}
109+
50110
// -----
51111

52112
// CHECK-LABEL: func.func @pad_alloc_negative

0 commit comments

Comments
 (0)