llvm · IanWood1 · Mar 28, 2025 · Mar 28, 2025
@@ -1897,6 +1897,9 @@ struct FoldReshapeWithGenericOpByCollapsing
                                          "fusion blocked by control function");
     }
 
+    // Set the insertion point after `producer` because there could be uses
+    // of `producer` between it and the `tensor.collapse_shape` op.
+    rewriter.setInsertionPointAfter(producer);
     std::optional<CollapseResult> collapseResult =
         collapseOpIterationDims(producer, collapsableIterationDims, rewriter);
     if (!collapseResult) {

diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
@@ -798,3 +798,35 @@ func.func @fuse_by_collapsing_change_reshape_order_bubblecollapse(%arg0 : tensor
 // CONTROL-SAME:       ins(%[[ARG0]],
 //      CONTROL:   %[[COLLAPSE:.+]] = tensor.collapse_shape %[[GENERIC]]
 //      CONTROL:   return %[[COLLAPSE]]
+
+// -----
+
+// Check that new ops are inserted at `%0` because `%0` is also used by `tensor.dim`.
+#map0 = affine_map<(d0, d1) -> (d0, d1)>
+func.func @fuse_by_collapsing_correct_insertion(%arg0 : tensor<?x?xf32>,
+    %sz0: index, %sz1: index) -> (tensor<?xf32>, index) {
+  %c0 = arith.constant 0 : index
+  %init = tensor.empty(%sz1, %sz0) : tensor<?x?xf32>
+  %0 = linalg.generic {
+      indexing_maps = [#map0, #map0],
+      iterator_types = ["parallel", "parallel"]}
+      ins(%arg0 : tensor<?x?xf32>)
+      outs(%init : tensor<?x?xf32>) {
+        ^bb0(%b0 : f32, %b1 : f32):
+          %out = arith.negf %b0 : f32
+          linalg.yield %out : f32
+      } -> tensor<?x?xf32>
+  %dim = tensor.dim %0, %c0 : tensor<?x?xf32>
+  %1 = tensor.collapse_shape %0 [[0, 1]] : tensor<?x?xf32> into tensor<?xf32>
+  return %1, %dim : tensor<?xf32>, index
+}
+
+// CHECK-LABEL: func @fuse_by_collapsing_correct_insertion
+// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32>
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK:     %[[COLLAPSE:.+]] = tensor.collapse_shape %[[ARG0]]
+// CHECK:     %[[OUT:.+]] = linalg.generic
+// CHECK-SAME:   ins(%[[COLLAPSE]] : tensor<?xf32>)
+// CHECK:     %[[EXPANDED:.+]] = tensor.expand_shape %[[OUT]]
+// CHECK:     %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]]
+// CHECK:      return %[[OUT]], %[[DIM]]