[mlir][DispatchCreation] Avoid SSA violation due to consumer fusion while forming dispatches (iree-org#21186)

MaheshRavishankar · IanWood1 · web-flow · commit e0b184c90e07 · 2025-07-01T10:08:01.000-07:00
Consumer fusion would create illegal dispatches in presence of diamond fusion patterns when one of the operations arent fused into dispatch. For example ``` %producer = ... %0 = "non_fused_op"(%producer) %1 = "fused_op"(%producer, %0) ``` Moving `"fused_op"` into the same dispatch as `%producer` is a SSA violation. Avoid this fusion. Fixes iree-org#21176 --------- Signed-off-by: MaheshRavishankar <mahesh.ravishankar@gmail.com> Signed-off-by: Ian Wood <ianwood2024@u.northwestern.edu> Co-authored-by: Ian Wood <ianwood2024@u.northwestern.edu>
diff --git a/compiler/src/iree/compiler/DispatchCreation/FoldUnitExtentDims.cpp b/compiler/src/iree/compiler/DispatchCreation/FoldUnitExtentDims.cpp
@@ -259,8 +259,10 @@ void FoldUnitExtentDimsPass::runOnOperation() {
 
   RewritePatternSet foldUnitDimsPatterns(context);
   populatefoldUnitDimsPatterns(foldUnitDimsPatterns);
-  if (failed(
-          applyPatternsGreedily(moduleOp, std::move(foldUnitDimsPatterns)))) {
+  GreedyRewriteConfig rewriterConfig;
+  rewriterConfig.setMaxIterations(GreedyRewriteConfig::kNoLimit);
+  if (failed(applyPatternsGreedily(moduleOp, std::move(foldUnitDimsPatterns),
+                                   rewriterConfig))) {
     return signalPassFailure();
   }
 }
@@ -269,8 +271,10 @@ void FoldUnitExtentDimsForFuncPass::runOnOperation() {
   MLIRContext *context = &getContext();
   RewritePatternSet foldUnitDimsPatterns(context);
   populatefoldUnitDimsPatterns(foldUnitDimsPatterns);
-  if (failed(applyPatternsGreedily(getOperation(),
-                                   std::move(foldUnitDimsPatterns)))) {
+  GreedyRewriteConfig rewriterConfig;
+  rewriterConfig.setMaxIterations(GreedyRewriteConfig::kNoLimit);
+  if (failed(applyPatternsGreedily(
+          getOperation(), std::move(foldUnitDimsPatterns), rewriterConfig))) {
     return signalPassFailure();
   }
 }
diff --git a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp
@@ -994,7 +994,7 @@ createFusionGroups(TensorDimTrackingRewriter &rewriter,
       }
 
       if (failed(moveOperandDefs(rewriter, consumer, regionOp, dominanceInfo,
-                                 regionOp.getOperation()))) {
+                                 {}))) {
         continue;
       }
 
diff --git a/compiler/src/iree/compiler/DispatchCreation/FusionUtils.cpp b/compiler/src/iree/compiler/DispatchCreation/FusionUtils.cpp
@@ -120,6 +120,10 @@ LogicalResult moveOperandDefs(RewriterBase &rewriter,
   llvm::SetVector<Operation *> slice;
   for (auto op : operations) {
     for (auto operand : op->getOperands()) {
+      // If operand is the insertion point, there is nothing to move.
+      if (operand.getDefiningOp() == insertionPoint) {
+        continue;
+      }
       [[maybe_unused]] LogicalResult result =
           getBackwardSlice(operand, &slice, options);
       assert(result.succeeded());
@@ -131,12 +135,20 @@ LogicalResult moveOperandDefs(RewriterBase &rewriter,
     llvm::SetVector<Value> capturedVals;
     mlir::getUsedValuesDefinedAbove(regions, capturedVals);
     for (auto value : capturedVals) {
+      // If operand is the insertion point, there is nothing to move.
+      if (value.getDefiningOp() == insertionPoint) {
+        continue;
+      }
       [[maybe_unused]] LogicalResult result =
           getBackwardSlice(value, &slice, options);
       assert(result.succeeded());
     }
   }
 
+  if (slice.contains(insertionPoint)) {
+    return failure();
+  }
+
   mlir::topologicalSort(slice);
   for (auto op : slice) {
     rewriter.moveOpBefore(op, insertionPoint);
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir b/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir
@@ -1331,3 +1331,65 @@ util.func @attention_rope_fusion(%arg0: tensor<10x20x30x50xbf16>,
 //  CHECK-SAME:         ins(%[[Q]], %[[K]], %[[V]]
 //       CHECK:     flow.return %[[ATTENTION]]
 //       CHECK:   util.return %[[DISPATCH]]
+
+// -----
+
+
+// Avoid fusing consumer when the producer/consumer has the following structure
+//
+// ```mlir
+// %producer = "producer_op"
+// %root = "root_op"(%producer)
+// %0 = "non_fusable_op"(%producer)
+// %1 = "consumer_op"(%producer, %root_op, %0)
+// ```
+//
+// Moving the `"producer_op"`, `"root+_op"`, and  `"consumer_op"`  into a dispatch
+// and leaving `"non_fusable_op"` out would lead to SSA violation.
+util.func public @avoid_illegal_consumer_fusion(%arg0: tensor<75600x5120xf32>) -> tensor<75600x1x5120xbf16> {
+  %cst0 = arith.constant 0.0 : bf16
+  %0 = tensor.empty() : tensor<75600x5120xbf16>
+  %1 = linalg.generic {
+      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
+      iterator_types = ["parallel", "parallel"]}
+      ins(%arg0 : tensor<75600x5120xf32>) outs(%0 : tensor<75600x5120xbf16>) {
+  ^bb0(%in: f32, %out: bf16):
+    %13 = arith.truncf %in : f32 to bf16
+    linalg.yield %13 : bf16
+  } -> tensor<75600x5120xbf16>
+  %2 = tensor.empty() : tensor<75600xbf16>
+  %3 = linalg.fill ins(%cst0 : bf16) outs(%2 : tensor<75600xbf16>) -> tensor<75600xbf16>
+  %4 = linalg.generic {
+      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
+      iterator_types = ["parallel", "reduction"]}
+      ins(%1 : tensor<75600x5120xbf16>) outs(%3 : tensor<75600xbf16>) {
+  ^bb0(%in: bf16, %out: bf16):
+    %8 = arith.addf %in, %out : bf16
+    linalg.yield %8 : bf16
+  } -> tensor<75600xbf16>
+  %expanded = tensor.expand_shape %1 [[0], [1, 2]] output_shape [75600, 1, 5120]
+      : tensor<75600x5120xbf16> into tensor<75600x1x5120xbf16>
+  %5 = tensor.empty() : tensor<75600x1x5120xbf16>
+  %6 = linalg.generic {
+      indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+                       affine_map<(d0, d1, d2) -> (d0)>,
+                       affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+      iterator_types = ["parallel", "parallel", "parallel"]}
+      ins(%expanded, %4 : tensor<75600x1x5120xbf16>, tensor<75600xbf16>)
+      outs(%5 : tensor<75600x1x5120xbf16>) {
+  ^bb0(%in: bf16, %in_0: bf16, %out: bf16):
+    %9 = arith.subf %in, %in_0 : bf16
+    linalg.yield %9 : bf16
+  } -> tensor<75600x1x5120xbf16>
+  util.return %6 : tensor<75600x1x5120xbf16>
+}
+// CHECK-LABEL: @avoid_illegal_consumer_fusion(
+//       CHECK:   %[[DISPATCH:.+]]:2 = flow.dispatch.region
+//       CHECK:     %[[GENERIC0:.+]] = linalg.generic
+//       CHECK:     %[[GENERIC1:.+]] = linalg.generic
+//  CHECK-SAME:         ins(%[[GENERIC0]] :
+//       CHECK:     flow.return %[[GENERIC1]], %[[GENERIC0]]
+//       CHECK:   %[[EXPAND_SHAPE:.+]] = tensor.expand_shape %[[DISPATCH]]#1
+//       CHECK:   %[[GENERIC2:.+]] = linalg.generic
+//  CHECK-SAME:       ins(%[[EXPAND_SHAPE]], %[[DISPATCH]]#0 :
+//       CHECK:   util.return %[[GENERIC2]]

Original file line number	Diff line number	Diff line change
`@@ -259,8 +259,10 @@ void FoldUnitExtentDimsPass::runOnOperation() {`
`259`	`259`
`260`	`260`	`RewritePatternSet foldUnitDimsPatterns(context);`
`261`	`261`	`populatefoldUnitDimsPatterns(foldUnitDimsPatterns);`
`262`		`- if (failed(`
`263`		`- applyPatternsGreedily(moduleOp, std::move(foldUnitDimsPatterns)))) {`
	`262`	`+ GreedyRewriteConfig rewriterConfig;`
	`263`	`+ rewriterConfig.setMaxIterations(GreedyRewriteConfig::kNoLimit);`
	`264`	`+ if (failed(applyPatternsGreedily(moduleOp, std::move(foldUnitDimsPatterns),`
	`265`	`+ rewriterConfig))) {`
`264`	`266`	`return signalPassFailure();`
`265`	`267`	`}`
`266`	`268`	`}`
`@@ -269,8 +271,10 @@ void FoldUnitExtentDimsForFuncPass::runOnOperation() {`
`269`	`271`	`MLIRContext *context = &getContext();`
`270`	`272`	`RewritePatternSet foldUnitDimsPatterns(context);`
`271`	`273`	`populatefoldUnitDimsPatterns(foldUnitDimsPatterns);`
`272`		`- if (failed(applyPatternsGreedily(getOperation(),`
`273`		`- std::move(foldUnitDimsPatterns)))) {`
	`274`	`+ GreedyRewriteConfig rewriterConfig;`
	`275`	`+ rewriterConfig.setMaxIterations(GreedyRewriteConfig::kNoLimit);`
	`276`	`+ if (failed(applyPatternsGreedily(`
	`277`	`+ getOperation(), std::move(foldUnitDimsPatterns), rewriterConfig))) {`
`274`	`278`	`return signalPassFailure();`
`275`	`279`	`}`
`276`	`280`	`}`
Original file line number	Diff line number	Diff line change
`@@ -994,7 +994,7 @@ createFusionGroups(TensorDimTrackingRewriter &rewriter,`
`994`	`994`	`}`
`995`	`995`
`996`	`996`	`if (failed(moveOperandDefs(rewriter, consumer, regionOp, dominanceInfo,`
`997`		`- regionOp.getOperation()))) {`
	`997`	`+ {}))) {`
`998`	`998`	`continue;`
`999`	`999`	`}`
`1000`	`1000`