llvm · linuxlonelyeagle · Oct 25, 2025 · Oct 25, 2025
diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
@@ -330,6 +330,7 @@ def AffineForOp : Affine_Op<"for",
     Speculation::Speculatability getSpeculatability();
   }];
 
+  let hasCanonicalizer = 1;
   let hasCustomAssemblyFormat = 1;
   let hasFolder = 1;
   let hasRegionVerifier = 1;

diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -58,8 +58,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createRaiseMemrefToAffine();
 /// Apply normalization transformations to affine loop-like ops. If
 /// `promoteSingleIter` is true, single iteration loops are promoted (i.e., the
 /// loop is replaced by its loop body).
-std::unique_ptr<OperationPass<func::FuncOp>>
-createAffineLoopNormalizePass(bool promoteSingleIter = false);
+std::unique_ptr<OperationPass<func::FuncOp>> createAffineLoopNormalizePass();
 
 /// Performs packing (or explicit copying) of accessed memref regions into
 /// buffers in the specified faster memory space through either pointwise copies

diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -383,10 +383,6 @@ def AffineParallelize : Pass<"affine-parallelize", "func::FuncOp"> {
 def AffineLoopNormalize : Pass<"affine-loop-normalize", "func::FuncOp"> {
   let summary = "Apply normalization transformations to affine loop-like ops";
   let constructor = "mlir::affine::createAffineLoopNormalizePass()";
-  let options = [
-    Option<"promoteSingleIter", "promote-single-iter", "bool",
-           /*default=*/"true", "Promote single iteration loops">,
-  ];
 }
 
 def LoopCoalescing : Pass<"affine-loop-coalescing", "func::FuncOp"> {

diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -171,11 +171,8 @@ void normalizeAffineParallel(AffineParallelOp op);
 /// lower bound to zero and loop step to one. The upper bound is set to the trip
 /// count of the loop. Original loops must have a lower bound with only a single
 /// result. There is no such restriction on upper bounds. Returns success if the
-/// loop has been normalized (or is already in the normal form). If
-/// `promoteSingleIter` is true, the loop is simply promoted if it has a single
-/// iteration.
-LogicalResult normalizeAffineFor(AffineForOp op,
-                                 bool promoteSingleIter = false);
+/// loop has been normalized (or is already in the normal form).
+LogicalResult normalizeAffineFor(AffineForOp op);
 
 /// Traverse `e` and return an AffineExpr where all occurrences of `dim` have
 /// been replaced by either:

diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -2716,6 +2716,46 @@ LogicalResult AffineForOp::fold(FoldAdaptor adaptor,
   return success(folded);
 }
 
+/// Replaces the given op with the contents of the given single-block region,
+/// using the operands of the block terminator to replace operation results.
+static void replaceOpWithRegion(PatternRewriter &rewriter, Operation *op,
+                                Region &region, ValueRange blockArgs = {}) {
+  assert(region.hasOneBlock() && "expected single-block region");
+  Block *block = &region.front();
+  Operation *terminator = block->getTerminator();
+  ValueRange results = terminator->getOperands();
+  rewriter.inlineBlockBefore(block, op, blockArgs);
+  rewriter.replaceOp(op, results);
+  rewriter.eraseOp(terminator);
+}
+
+struct SimplifyTrivialLoops : public OpRewritePattern<AffineForOp> {
+  using OpRewritePattern<AffineForOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(AffineForOp forOp,
+                                PatternRewriter &rewriter) const override {
+    std::optional<uint64_t> tripCount = getTrivialConstantTripCount(forOp);
+    if (!tripCount.has_value() || tripCount != 1)
+      return failure();
+
+    SmallVector<Value> blockArgs;
+    blockArgs.reserve(forOp.getInits().size() + 1);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    Value lower = AffineApplyOp::create(
+        rewriter, forOp.getLoc(), forOp.getLowerBoundMap(),
+        ValueRange(forOp.getLowerBoundOperands()));
+    forOp.getInductionVar().replaceAllUsesWith(lower);
+    blockArgs.push_back(lower);
+    llvm::append_range(blockArgs, forOp.getInits());
+    replaceOpWithRegion(rewriter, forOp, forOp.getRegion(), blockArgs);
+    return success();
+  }
+};
+
+void AffineForOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                              MLIRContext *context) {
+  results.add<SimplifyTrivialLoops>(context);
+}
+
 OperandRange AffineForOp::getEntrySuccessorOperands(RegionBranchPoint point) {
   assert((point.isParent() || point == getRegion()) && "invalid region point");
 

diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopNormalize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopNormalize.cpp
@@ -33,23 +33,21 @@ namespace {
 /// that are already in a normalized form.
 struct AffineLoopNormalizePass
     : public affine::impl::AffineLoopNormalizeBase<AffineLoopNormalizePass> {
-  explicit AffineLoopNormalizePass(bool promoteSingleIter) {
-    this->promoteSingleIter = promoteSingleIter;
-  }
+  using Base::Base;
 
   void runOnOperation() override {
     getOperation().walk([&](Operation *op) {
       if (auto affineParallel = dyn_cast<AffineParallelOp>(op))
         normalizeAffineParallel(affineParallel);
       else if (auto affineFor = dyn_cast<AffineForOp>(op))
-        (void)normalizeAffineFor(affineFor, promoteSingleIter);
+        (void)normalizeAffineFor(affineFor);
     });
   }
 };
 
 } // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
-mlir::affine::createAffineLoopNormalizePass(bool promoteSingleIter) {
-  return std::make_unique<AffineLoopNormalizePass>(promoteSingleIter);
+mlir::affine::createAffineLoopNormalizePass() {
+  return std::make_unique<AffineLoopNormalizePass>();
 }
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -556,10 +556,7 @@ void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
   op.setUpperBounds(ranges.getOperands(), newUpperMap);
 }
 
-LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
-                                               bool promoteSingleIter) {
-  if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
-    return success();
+LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op) {
 
   // Check if the forop is already normalized.
   if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&

diff --git a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
@@ -1,5 +1,4 @@
 // RUN: mlir-opt %s -affine-loop-normalize -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -affine-loop-normalize='promote-single-iter=1' -split-input-file | FileCheck %s --check-prefix=PROMOTE-SINGLE-ITER
 
 // Normalize steps to 1 and lower bounds to 0.
 
@@ -37,26 +36,6 @@ func.func @relative_bounds(%arg: index) {
 
 // -----
 
-// Check that single iteration loop is removed and its body is promoted to the
-// parent block.
-
-// CHECK-LABEL: func @promote_single_iter_loop
-// PROMOTE-SINGLE-ITER-LABEL: func @promote_single_iter_loop
-func.func @promote_single_iter_loop(%in: memref<1xf32>, %out: memref<1xf32>) {
-  affine.for %i = 0 to 1 {
-    %1 = affine.load %in[%i] : memref<1xf32>
-    affine.store %1, %out[%i] : memref<1xf32>
-  }
-  return
-}
-
-// PROMOTE-SINGLE-ITER-NEXT: arith.constant
-// PROMOTE-SINGLE-ITER-NEXT: affine.load
-// PROMOTE-SINGLE-ITER-NEXT: affine.store
-// PROMOTE-SINGLE-ITER-NEXT: return
-
-// -----
-
 // CHECK-DAG: [[$IV0:#map[0-9]*]] = affine_map<(d0) -> (d0 * 2 + 2)>
 // CHECK-DAG: [[$IV1:#map[0-9]*]] = affine_map<(d0) -> (d0 * 3)>
 

diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -1323,12 +1323,10 @@ func.func @simplify_bounds_tiled() {
       }
     }
   }
-  // CHECK:      affine.for
-  // CHECK-NEXT:   affine.for
+  // CHECK:      affine.for %{{.*}} 0 to 2
+  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 32 step 16
   // CHECK-NEXT:     affine.for %{{.*}} = 0 to 32 step 16
-  // CHECK-NEXT:       affine.for %{{.*}} = 0 to 32 step 16
-  // CHECK-NEXT:         affine.for %{{.*}} = 0 to 2
-  // CHECK-NEXT:           affine.for %{{.*}} = 0 to 16 step 16
+  // CHECK-NEXT:       affine.for %{{.*}} = 0 to 2
 
   return
 }
@@ -1348,9 +1346,7 @@ func.func @simplify_min_max_multi_expr() {
   // CHECK: affine.for
   affine.for %i = 0 to 2 {
     // CHECK: affine.for
-    affine.for %j = 0 to 4 {
-      // The first upper bound expression will not be lower than -9. So, it's redundant.
-      // CHECK-NEXT: affine.for %{{.*}} = -10 to -9
+    affine.for %j = 0 to 4 { 
       affine.for %k = -10 to min affine_map<(d0, d1) -> (4 * d0 - 3 * d1, -9)>(%i, %j) {
         "test.foo"() : () -> ()
       }
@@ -1370,7 +1366,6 @@ func.func @simplify_min_max_multi_expr() {
     }
   }
 
-  // CHECK: affine.for %{{.*}} = 0 to 1
   affine.for %i = 0 to 2 {
     affine.for %j = max affine_map<(d0) -> (d0 floordiv 2, 0)>(%i) to 1 {
       "test.foo"() : () -> ()
@@ -2401,3 +2396,21 @@ func.func @for_empty_body_folder_iv_yield() -> index {
   }
   return %10 : index
 }
+
+// -----
+
+// Check that single iteration loop is removed and its body is promoted to the
+// parent block.
+
+// CHECK-LABEL: func @promote_single_iter_loop
+// CHECK-SAME:    %[[IN:.*]]: memref<1xf32>, %[[OUT:.*]]: memref<1xf32>
+func.func @promote_single_iter_loop(%in: memref<1xf32>, %out: memref<1xf32>) {
+  affine.for %i = 0 to 1 {
+    %1 = affine.load %in[%i] : memref<1xf32>
+    affine.store %1, %out[%i] : memref<1xf32>
+  }
+  return
+}
+
+// CHECK-NEXT: %[[DATA:.*]] = affine.load %[[IN]][0]
+// CHECK-NEXT: affine.store %[[DATA]], %[[OUT]][0]
diff --git a/mlir/test/Dialect/Affine/raise-memref.mlir b/mlir/test/Dialect/Affine/raise-memref.mlir
@@ -51,21 +51,17 @@ func.func @reduce_window_max() {
 // CHECK:        affine.for %[[arg0:.*]] =
 // CHECK:          affine.for %[[arg1:.*]] =
 // CHECK:            affine.for %[[arg2:.*]] =
-// CHECK:              affine.for %[[arg3:.*]] =
-// CHECK:                affine.store %[[cst]], %[[v0]][%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]]] :
+// CHECK:                affine.store %[[cst]], %[[v0]][0, %[[arg0]], %[[arg1]], %[[arg2]]] :
 // CHECK:        affine.for %[[a0:.*]] =
 // CHECK:          affine.for %[[a1:.*]] =
 // CHECK:            affine.for %[[a2:.*]] =
 // CHECK:              affine.for %[[a3:.*]] =
 // CHECK:                affine.for %[[a4:.*]] =
-// CHECK:                  affine.for %[[a5:.*]] =
-// CHECK:                    affine.for %[[a6:.*]] =
-// CHECK:                      affine.for %[[a7:.*]] =
-// CHECK:                        %[[lhs:.*]] = affine.load %[[v0]][%[[a0]], %[[a1]], %[[a2]], %[[a3]]] :
-// CHECK:                        %[[rhs:.*]] = affine.load %[[v1]][%[[a0]] + %[[a4]], %[[a1]] * 2 + %[[a5]], %[[a2]] * 2 + %[[a6]], %[[a3]] + %[[a7]]] :
-// CHECK:                        %[[res:.*]] = arith.cmpf ogt, %[[lhs]], %[[rhs]] : f32
-// CHECK:                        %[[sel:.*]] = arith.select %[[res]], %[[lhs]], %[[rhs]] : f32
-// CHECK:                        affine.store %[[sel]], %[[v0]][%[[a0]], %[[a1]], %[[a2]], %[[a3]]] :
+// CHECK:                  %[[lhs:.*]] = affine.load %[[v0]][0, %[[a0]], %[[a1]], %[[a2]]] :
+// CHECK:                  %[[rhs:.*]] = affine.load %[[v1]][0, %[[a0]] * 2 + %[[a3]], %[[a1]] * 2 + %[[a4]], %[[a2]]] :
+// CHECK:                  %[[res:.*]] = arith.cmpf ogt, %[[lhs]], %[[rhs]] : f32
+// CHECK:                  %[[sel:.*]] = arith.select %[[res]], %[[lhs]], %[[rhs]] : f32
+// CHECK:                  affine.store %[[sel]], %[[v0]][0, %[[a0]], %[[a1]], %[[a2]]] :
 
 // CHECK-LABEL:    func @symbols(
 func.func @symbols(%N : index) {