[Codegen][GPU] Also don't tile large fills (#19937)

qedawkins · web-flow · commit c2e13e84a729 · 2025-02-07T23:01:05.000Z
This skips tiling large fills for the same reasoning as in #19887
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileLargeTensors.cpp b/compiler/src/iree/compiler/Codegen/Common/TileLargeTensors.cpp
@@ -179,11 +179,12 @@ static void processRegion(RewriterBase &rewriter, Region *region,
       // Try to greedily tile + fuse linalg ops.
       if (auto linalgOp = dyn_cast<linalg::LinalgOp>(op)) {
 
-        // Skip copies and transposes. This is based on an expectation that such
-        // ops are introduced carefully and don't represent significant
-        // computation anyway. Equivalent generics are still tiled as they
-        // typically arise organically.
-        if (isa<linalg::TransposeOp, linalg::CopyOp>(op)) {
+        // Skip copies, transposes, and fills. This is based on an expectation
+        // that such ops are introduced carefully and don't represent
+        // significant computation anyway. Equivalent generics are still tiled
+        // as they typically arise organically. Fills in particular are almost
+        // never found on their own and will be fused when tiling if need be.
+        if (isa<linalg::TransposeOp, linalg::CopyOp, linalg::FillOp>(op)) {
           continue;
         }
         tileToMaxVectorSize(rewriter, linalgOp, maxVectorSize);
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/tile_large_tensors.mlir b/compiler/src/iree/compiler/Codegen/Common/test/tile_large_tensors.mlir
@@ -113,3 +113,18 @@ func.func @no_tile_copy(%arg0: tensor<64x256xf32>) -> tensor<64x256xf32> {
 //   CHECK-NOT:   scf.for
 //       CHECK:   %[[COPY:.+]] = linalg.copy
 //       CHECK:   return %[[COPY]]
+
+// -----
+
+func.func @no_tile_fill(%arg0: f32) -> tensor<64x256xf32> {
+  %empty = tensor.empty() : tensor<64x256xf32>
+  %0 = linalg.fill
+    ins(%arg0 : f32)
+    outs(%empty : tensor<64x256xf32>) -> tensor<64x256xf32>
+  return %0 : tensor<64x256xf32>
+}
+
+// CHECK-LABEL: func.func @no_tile_fill
+//   CHECK-NOT:   scf.for
+//       CHECK:   %[[FILL:.+]] = linalg.fill
+//       CHECK:   return %[[FILL]]