Skip to content

Commit c2e13e8

Browse files
authored
[Codegen][GPU] Also don't tile large fills (#19937)
This skips tiling large fills for the same reasoning as in #19887
1 parent b5b943a commit c2e13e8

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

compiler/src/iree/compiler/Codegen/Common/TileLargeTensors.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,12 @@ static void processRegion(RewriterBase &rewriter, Region *region,
179179
// Try to greedily tile + fuse linalg ops.
180180
if (auto linalgOp = dyn_cast<linalg::LinalgOp>(op)) {
181181

182-
// Skip copies and transposes. This is based on an expectation that such
183-
// ops are introduced carefully and don't represent significant
184-
// computation anyway. Equivalent generics are still tiled as they
185-
// typically arise organically.
186-
if (isa<linalg::TransposeOp, linalg::CopyOp>(op)) {
182+
// Skip copies, transposes, and fills. This is based on an expectation
183+
// that such ops are introduced carefully and don't represent
184+
// significant computation anyway. Equivalent generics are still tiled
185+
// as they typically arise organically. Fills in particular are almost
186+
// never found on their own and will be fused when tiling if need be.
187+
if (isa<linalg::TransposeOp, linalg::CopyOp, linalg::FillOp>(op)) {
187188
continue;
188189
}
189190
tileToMaxVectorSize(rewriter, linalgOp, maxVectorSize);

compiler/src/iree/compiler/Codegen/Common/test/tile_large_tensors.mlir

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,18 @@ func.func @no_tile_copy(%arg0: tensor<64x256xf32>) -> tensor<64x256xf32> {
113113
// CHECK-NOT: scf.for
114114
// CHECK: %[[COPY:.+]] = linalg.copy
115115
// CHECK: return %[[COPY]]
116+
117+
// -----
118+
119+
func.func @no_tile_fill(%arg0: f32) -> tensor<64x256xf32> {
120+
%empty = tensor.empty() : tensor<64x256xf32>
121+
%0 = linalg.fill
122+
ins(%arg0 : f32)
123+
outs(%empty : tensor<64x256xf32>) -> tensor<64x256xf32>
124+
return %0 : tensor<64x256xf32>
125+
}
126+
127+
// CHECK-LABEL: func.func @no_tile_fill
128+
// CHECK-NOT: scf.for
129+
// CHECK: %[[FILL:.+]] = linalg.fill
130+
// CHECK: return %[[FILL]]

0 commit comments

Comments
 (0)