From 84157c03453f2c23faa086541a0cc3421ae7711a Mon Sep 17 00:00:00 2001
From: Prakhar Dixit <dixitprakhar11@gmail.com>
Date: Sun, 20 Apr 2025 12:01:42 +0530
Subject: [PATCH 1/6] [mlir][affine] Modify assertion into a user visible
 diagnostic

---
 mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp | 10 ++++--
 .../SuperVectorize/vectorize_unsupported.mlir | 35 +++++++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
index 024e8ccb901de..f15ed4b0d5f84 100644
--- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -141,8 +141,14 @@ static AffineMap makePermutationMap(
     unsigned countInvariantIndices = 0;
     for (unsigned dim = 0; dim < numIndices; ++dim) {
       if (!invariants.count(indices[dim])) {
-        assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
-               "permutationMap already has an entry along dim");
+        if (perm[kvp.second] != getAffineConstantExpr(0, context)) {
+          auto loopOp = cast<affine::AffineForOp>(kvp.first);
+          loopOp->emitError(
+              "loop induction variable is used in multiple indices, which is "
+              "unsupported for vectorization. Consider using nested loops "
+              "instead of a single loop with affine.apply.");
+          return AffineMap();
+        }
         perm[kvp.second] = getAffineDimExpr(dim, context);
       } else {
         ++countInvariantIndices;
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
index 6c1a7c48c4cb1..c8c009f02212b 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
@@ -9,3 +9,38 @@ func.func @unparallel_loop_reduction_unsupported(%in: memref<256x512xf32>, %out:
  }
  return
 }
+
+// -----
+
+#map = affine_map<(d0)[s0] -> (d0 mod s0)>
+#map1 = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+
+func.func @single_loop_unrolling_2D_access_pattern(%arg0: index) -> memref<2x2xf32> {
+  %c2 = arith.constant 2 : index
+  %cst = arith.constant 1.0 : f32
+  %alloc = memref.alloc() : memref<2x2xf32>
+    
+    affine.for %i = 0 to 4 {
+      %row = affine.apply #map1(%i)[%c2]  
+      %col = affine.apply #map(%i)[%c2]  
+      affine.store %cst, %alloc[%row, %col] : memref<2x2xf32>
+    }
+    
+    return %alloc : memref<2x2xf32>
+  }
+
+// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
+
+// CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern(
+// CHECK-SAME:                            %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK:           %[[VAL_3:.*]] = memref.alloc() : memref<2x2xf32>
+// CHECK:           affine.for %[[VAL_4:.*]] = 0 to 4 {
+// CHECK:             %[[VAL_5:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_4]]){{\[}}%[[VAL_1]]]
+// CHECK:             %[[VAL_6:.*]] = affine.apply #[[$ATTR_1]](%[[VAL_4]]){{\[}}%[[VAL_1]]]
+// CHECK:             affine.store %[[VAL_2]], %[[VAL_3]]{{\[}}%[[VAL_5]], %[[VAL_6]]] : memref<2x2xf32>
+// CHECK:           }
+// CHECK:           return %[[VAL_3]] : memref<2x2xf32>
+// CHECK:         }
\ No newline at end of file

From 8112d1a04b248b4bd6098ad3d9e70ef06232868d Mon Sep 17 00:00:00 2001
From: Prakhar Dixit <dixitprakhar11@gmail.com>
Date: Tue, 22 Apr 2025 13:09:52 +0530
Subject: [PATCH 2/6] Add check to ensure pass fails gracefully

---
 .../Affine/Transforms/SuperVectorize.cpp      | 30 +++++++++++++++
 mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp | 10 +----
 .../SuperVectorize/vectorize_unsupported.mlir | 37 ++++++++++++++++++-
 3 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index eaaafaf68767e..cf1341084021b 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -1217,6 +1217,21 @@ static Operation *vectorizeAffineLoad(AffineLoadOp loadOp,
     indices.append(mapOperands.begin(), mapOperands.end());
   }
 
+  for (auto &kvp : state.vecLoopToVecDim) {
+    AffineForOp forOp = cast<AffineForOp>(kvp.first);
+    auto invariants =
+        affine::getInvariantAccesses(forOp.getInductionVar(), indices);
+    unsigned nonInvariant = 0;
+    for (Value idx : indices)
+      if (!invariants.count(idx))
+        ++nonInvariant;
+    if (nonInvariant > 1) {
+      LLVM_DEBUG(dbgs() << "\n[early-vect] Bail out: loop IV "
+                        << forOp.getInductionVar() << " drives " << nonInvariant
+                        << " indices (must be ≤1)\n");
+      return nullptr;
+    }
+  }
   // Compute permutation map using the information of new vector loops.
   auto permutationMap = makePermutationMap(state.builder.getInsertionBlock(),
                                            indices, state.vecLoopToVecDim);
@@ -1262,6 +1277,21 @@ static Operation *vectorizeAffineStore(AffineStoreOp storeOp,
   else
     indices.append(mapOperands.begin(), mapOperands.end());
 
+  for (auto &kvp : state.vecLoopToVecDim) {
+    AffineForOp forOp = cast<AffineForOp>(kvp.first);
+    auto invariants =
+        affine::getInvariantAccesses(forOp.getInductionVar(), indices);
+    unsigned nonInvariant = 0;
+    for (Value idx : indices)
+      if (!invariants.count(idx))
+        ++nonInvariant;
+    if (nonInvariant > 1) {
+      LLVM_DEBUG(dbgs() << "\n[early-vect] Bail out: loop IV "
+                        << forOp.getInductionVar() << " drives " << nonInvariant
+                        << " indices (must be ≤1)\n");
+      return nullptr;
+    }
+  }
   // Compute permutation map using the information of new vector loops.
   auto permutationMap = makePermutationMap(state.builder.getInsertionBlock(),
                                            indices, state.vecLoopToVecDim);
diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
index f15ed4b0d5f84..024e8ccb901de 100644
--- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -141,14 +141,8 @@ static AffineMap makePermutationMap(
     unsigned countInvariantIndices = 0;
     for (unsigned dim = 0; dim < numIndices; ++dim) {
       if (!invariants.count(indices[dim])) {
-        if (perm[kvp.second] != getAffineConstantExpr(0, context)) {
-          auto loopOp = cast<affine::AffineForOp>(kvp.first);
-          loopOp->emitError(
-              "loop induction variable is used in multiple indices, which is "
-              "unsupported for vectorization. Consider using nested loops "
-              "instead of a single loop with affine.apply.");
-          return AffineMap();
-        }
+        assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
+               "permutationMap already has an entry along dim");
         perm[kvp.second] = getAffineDimExpr(dim, context);
       } else {
         ++countInvariantIndices;
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
index c8c009f02212b..51b7020f30943 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
@@ -15,7 +15,7 @@ func.func @unparallel_loop_reduction_unsupported(%in: memref<256x512xf32>, %out:
 #map = affine_map<(d0)[s0] -> (d0 mod s0)>
 #map1 = affine_map<(d0)[s0] -> (d0 floordiv s0)>
 
-func.func @single_loop_unrolling_2D_access_pattern(%arg0: index) -> memref<2x2xf32> {
+func.func @single_loop_unrolling_2D_access_pattern_storeOp(%arg0: index) -> memref<2x2xf32> {
   %c2 = arith.constant 2 : index
   %cst = arith.constant 1.0 : f32
   %alloc = memref.alloc() : memref<2x2xf32>
@@ -33,7 +33,7 @@ func.func @single_loop_unrolling_2D_access_pattern(%arg0: index) -> memref<2x2xf
 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
 
 // CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern(
-// CHECK-SAME:                            %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
+// CHECK-SAME:      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
 // CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
 // CHECK:           %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
 // CHECK:           %[[VAL_3:.*]] = memref.alloc() : memref<2x2xf32>
@@ -43,4 +43,37 @@ func.func @single_loop_unrolling_2D_access_pattern(%arg0: index) -> memref<2x2xf
 // CHECK:             affine.store %[[VAL_2]], %[[VAL_3]]{{\[}}%[[VAL_5]], %[[VAL_6]]] : memref<2x2xf32>
 // CHECK:           }
 // CHECK:           return %[[VAL_3]] : memref<2x2xf32>
+// CHECK:         }
+
+// -----
+
+#map = affine_map<(d0)[s0] -> (d0 mod s0)>
+#map1 = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+
+func.func @single_loop_unrolling_2D_access_pattern_loadOp(%arg0: index) -> memref<2x2xf32> {
+  %c2 = arith.constant 2 : index
+  %alloc = memref.alloc() : memref<2x2xf32>
+
+  affine.for %i = 0 to 4 {
+    %row = affine.apply #map1(%i)[%c2]  
+    %col = affine.apply #map(%i)[%c2]  
+    %val = affine.load %alloc[%row, %col] : memref<2x2xf32>
+  }
+
+  return %alloc : memref<2x2xf32>
+}
+
+// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
+
+// CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern(
+// CHECK-SAME:      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_2:.*]] = memref.alloc() : memref<2x2xf32>
+// CHECK:           affine.for %[[VAL_3:.*]] = 0 to 4 {
+// CHECK:             %[[VAL_4:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_3]]){{\[}}%[[VAL_1]]]
+// CHECK:             %[[VAL_5:.*]] = affine.apply #[[$ATTR_1]](%[[VAL_3]]){{\[}}%[[VAL_1]]]
+// CHECK:             %[[VAL_6:.*]] = affine.load %[[VAL_2]]{{\[}}%[[VAL_4]], %[[VAL_5]]] : memref<2x2xf32>
+// CHECK:           }
+// CHECK:           return %[[VAL_2]] : memref<2x2xf32>
 // CHECK:         }
\ No newline at end of file

From 24acbf905f9810c8a6ce2c97c19662669f40123d Mon Sep 17 00:00:00 2001
From: Prakhar Dixit <dixitprakhar11@gmail.com>
Date: Tue, 22 Apr 2025 13:18:59 +0530
Subject: [PATCH 3/6] typo fix

---
 .../Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
index 51b7020f30943..43f2768cd2874 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
@@ -32,7 +32,7 @@ func.func @single_loop_unrolling_2D_access_pattern_storeOp(%arg0: index) -> memr
 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
 
-// CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern(
+// CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern_storeOp(
 // CHECK-SAME:      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
 // CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
 // CHECK:           %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
@@ -66,7 +66,7 @@ func.func @single_loop_unrolling_2D_access_pattern_loadOp(%arg0: index) -> memre
 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
 
-// CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern(
+// CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern_loadOp(
 // CHECK-SAME:      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
 // CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
 // CHECK:           %[[VAL_2:.*]] = memref.alloc() : memref<2x2xf32>

From 99f68859f930169151fde0017f993cd29c3c945c Mon Sep 17 00:00:00 2001
From: Prakhar Dixit <dixitprakhar11@gmail.com>
Date: Tue, 22 Apr 2025 19:42:06 +0530
Subject: [PATCH 4/6] Create a helper function to avoid code duplication

---
 .../Affine/Transforms/SuperVectorize.cpp      | 61 ++++++++++---------
 .../SuperVectorize/vectorize_unsupported.mlir | 41 +------------
 2 files changed, 33 insertions(+), 69 deletions(-)

diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index cf1341084021b..79bf2f4b6c1d3 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -1185,6 +1185,31 @@ static Value vectorizeOperand(Value operand, VectorizationState &state) {
   return nullptr;
 }
 
+/// Returns true if any vectorized loop IV drives more than one index.
+static bool isIVMappedToMultipleIndices(
+    ArrayRef<Value> indices,
+    const DenseMap<Operation *, unsigned> &loopToVectorDim) {
+  for (auto &kvp : loopToVectorDim) {
+    AffineForOp forOp = cast<AffineForOp>(kvp.first);
+    // Find which indices are invariant w.r.t. this loop IV.
+    auto invariants =
+        affine::getInvariantAccesses(forOp.getInductionVar(), indices);
+    // Count how many vary (i.e. are not invariant).
+    unsigned nonInvariant = 0;
+    for (Value idx : indices)
+      if (!invariants.count(idx))
+        ++nonInvariant;
+    // Bail if more than one index varies for this single loop IV.
+    if (nonInvariant > 1) {
+      LLVM_DEBUG(dbgs() << "[early‑vect] Bail out: IV "
+                        << forOp.getInductionVar() << " drives " << nonInvariant
+                        << " indices\n");
+      return true;
+    }
+  }
+  return false;
+}
+
 /// Vectorizes an affine load with the vectorization strategy in 'state' by
 /// generating a 'vector.transfer_read' op with the proper permutation map
 /// inferred from the indices of the load. The new 'vector.transfer_read' is
@@ -1217,21 +1242,9 @@ static Operation *vectorizeAffineLoad(AffineLoadOp loadOp,
     indices.append(mapOperands.begin(), mapOperands.end());
   }
 
-  for (auto &kvp : state.vecLoopToVecDim) {
-    AffineForOp forOp = cast<AffineForOp>(kvp.first);
-    auto invariants =
-        affine::getInvariantAccesses(forOp.getInductionVar(), indices);
-    unsigned nonInvariant = 0;
-    for (Value idx : indices)
-      if (!invariants.count(idx))
-        ++nonInvariant;
-    if (nonInvariant > 1) {
-      LLVM_DEBUG(dbgs() << "\n[early-vect] Bail out: loop IV "
-                        << forOp.getInductionVar() << " drives " << nonInvariant
-                        << " indices (must be ≤1)\n");
-      return nullptr;
-    }
-  }
+  if (isIVMappedToMultipleIndices(indices, state.vecLoopToVecDim))
+    return nullptr;
+
   // Compute permutation map using the information of new vector loops.
   auto permutationMap = makePermutationMap(state.builder.getInsertionBlock(),
                                            indices, state.vecLoopToVecDim);
@@ -1277,21 +1290,9 @@ static Operation *vectorizeAffineStore(AffineStoreOp storeOp,
   else
     indices.append(mapOperands.begin(), mapOperands.end());
 
-  for (auto &kvp : state.vecLoopToVecDim) {
-    AffineForOp forOp = cast<AffineForOp>(kvp.first);
-    auto invariants =
-        affine::getInvariantAccesses(forOp.getInductionVar(), indices);
-    unsigned nonInvariant = 0;
-    for (Value idx : indices)
-      if (!invariants.count(idx))
-        ++nonInvariant;
-    if (nonInvariant > 1) {
-      LLVM_DEBUG(dbgs() << "\n[early-vect] Bail out: loop IV "
-                        << forOp.getInductionVar() << " drives " << nonInvariant
-                        << " indices (must be ≤1)\n");
-      return nullptr;
-    }
-  }
+  if (isIVMappedToMultipleIndices(indices, state.vecLoopToVecDim))
+    return nullptr;
+
   // Compute permutation map using the information of new vector loops.
   auto permutationMap = makePermutationMap(state.builder.getInsertionBlock(),
                                            indices, state.vecLoopToVecDim);
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
index 43f2768cd2874..fcb5289efd13f 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
@@ -15,7 +15,7 @@ func.func @unparallel_loop_reduction_unsupported(%in: memref<256x512xf32>, %out:
 #map = affine_map<(d0)[s0] -> (d0 mod s0)>
 #map1 = affine_map<(d0)[s0] -> (d0 floordiv s0)>
 
-func.func @single_loop_unrolling_2D_access_pattern_storeOp(%arg0: index) -> memref<2x2xf32> {
+func.func @iv_mapped_to_multiple_indices_unsupported(%arg0: index) -> memref<2x2xf32> {
   %c2 = arith.constant 2 : index
   %cst = arith.constant 1.0 : f32
   %alloc = memref.alloc() : memref<2x2xf32>
@@ -32,48 +32,11 @@ func.func @single_loop_unrolling_2D_access_pattern_storeOp(%arg0: index) -> memr
 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
 
-// CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern_storeOp(
+// CHECK-LABEL:   func.func @iv_mapped_to_multiple_indices_unsupported(
 // CHECK-SAME:      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
 // CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
-// CHECK:           %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
-// CHECK:           %[[VAL_3:.*]] = memref.alloc() : memref<2x2xf32>
 // CHECK:           affine.for %[[VAL_4:.*]] = 0 to 4 {
 // CHECK:             %[[VAL_5:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_4]]){{\[}}%[[VAL_1]]]
 // CHECK:             %[[VAL_6:.*]] = affine.apply #[[$ATTR_1]](%[[VAL_4]]){{\[}}%[[VAL_1]]]
-// CHECK:             affine.store %[[VAL_2]], %[[VAL_3]]{{\[}}%[[VAL_5]], %[[VAL_6]]] : memref<2x2xf32>
 // CHECK:           }
-// CHECK:           return %[[VAL_3]] : memref<2x2xf32>
-// CHECK:         }
-
-// -----
-
-#map = affine_map<(d0)[s0] -> (d0 mod s0)>
-#map1 = affine_map<(d0)[s0] -> (d0 floordiv s0)>
-
-func.func @single_loop_unrolling_2D_access_pattern_loadOp(%arg0: index) -> memref<2x2xf32> {
-  %c2 = arith.constant 2 : index
-  %alloc = memref.alloc() : memref<2x2xf32>
-
-  affine.for %i = 0 to 4 {
-    %row = affine.apply #map1(%i)[%c2]  
-    %col = affine.apply #map(%i)[%c2]  
-    %val = affine.load %alloc[%row, %col] : memref<2x2xf32>
-  }
-
-  return %alloc : memref<2x2xf32>
-}
-
-// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
-// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-
-// CHECK-LABEL:   func.func @single_loop_unrolling_2D_access_pattern_loadOp(
-// CHECK-SAME:      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
-// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
-// CHECK:           %[[VAL_2:.*]] = memref.alloc() : memref<2x2xf32>
-// CHECK:           affine.for %[[VAL_3:.*]] = 0 to 4 {
-// CHECK:             %[[VAL_4:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_3]]){{\[}}%[[VAL_1]]]
-// CHECK:             %[[VAL_5:.*]] = affine.apply #[[$ATTR_1]](%[[VAL_3]]){{\[}}%[[VAL_1]]]
-// CHECK:             %[[VAL_6:.*]] = affine.load %[[VAL_2]]{{\[}}%[[VAL_4]], %[[VAL_5]]] : memref<2x2xf32>
-// CHECK:           }
-// CHECK:           return %[[VAL_2]] : memref<2x2xf32>
 // CHECK:         }
\ No newline at end of file

From 2052b9da9a0fbbed48e6fb396934e546fee4ddb4 Mon Sep 17 00:00:00 2001
From: Prakhar Dixit <dixitprakhar11@gmail.com>
Date: Wed, 30 Apr 2025 11:10:19 +0530
Subject: [PATCH 5/6] modify code with best practices

---
 .../Affine/Transforms/SuperVectorize.cpp      | 22 ++++++++++---------
 .../SuperVectorize/vectorize_unsupported.mlir |  4 ++--
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index 79bf2f4b6c1d3..e25bcad41a3a9 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -24,6 +24,7 @@
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"
 #include "mlir/IR/IRMapping.h"
+#include "mlir/IR/Value.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
@@ -1192,19 +1193,20 @@ static bool isIVMappedToMultipleIndices(
   for (auto &kvp : loopToVectorDim) {
     AffineForOp forOp = cast<AffineForOp>(kvp.first);
     // Find which indices are invariant w.r.t. this loop IV.
-    auto invariants =
+    llvm::DenseSet<Value> invariants =
         affine::getInvariantAccesses(forOp.getInductionVar(), indices);
     // Count how many vary (i.e. are not invariant).
     unsigned nonInvariant = 0;
-    for (Value idx : indices)
-      if (!invariants.count(idx))
-        ++nonInvariant;
-    // Bail if more than one index varies for this single loop IV.
-    if (nonInvariant > 1) {
-      LLVM_DEBUG(dbgs() << "[early‑vect] Bail out: IV "
-                        << forOp.getInductionVar() << " drives " << nonInvariant
-                        << " indices\n");
-      return true;
+    for (Value idx : indices) {
+      if (invariants.count(idx))
+        continue;
+
+      if (++nonInvariant > 1) {
+        LLVM_DEBUG(dbgs() << "[early‑vect] Bail out: IV "
+                          << forOp.getInductionVar() << " drives "
+                          << nonInvariant << " indices\n");
+        return true;
+      }
     }
   }
   return false;
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
index fcb5289efd13f..4f59b7812a668 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
@@ -33,10 +33,10 @@ func.func @iv_mapped_to_multiple_indices_unsupported(%arg0: index) -> memref<2x2
 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
 
 // CHECK-LABEL:   func.func @iv_mapped_to_multiple_indices_unsupported(
-// CHECK-SAME:      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index) -> memref<2x2xf32> {
+// CHECK-SAME:      %[[VAL_0:.*]]: index) -> memref<2x2xf32> {
 // CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
 // CHECK:           affine.for %[[VAL_4:.*]] = 0 to 4 {
 // CHECK:             %[[VAL_5:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_4]]){{\[}}%[[VAL_1]]]
 // CHECK:             %[[VAL_6:.*]] = affine.apply #[[$ATTR_1]](%[[VAL_4]]){{\[}}%[[VAL_1]]]
 // CHECK:           }
-// CHECK:         }
\ No newline at end of file
+// CHECK:         }

From 827b352742f5ced25a9502387271d75056dd175d Mon Sep 17 00:00:00 2001
From: Prakhar Dixit <dixitprakhar11@gmail.com>
Date: Wed, 30 Apr 2025 11:19:59 +0530
Subject: [PATCH 6/6] modify

---
 mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index e25bcad41a3a9..f6f192a6d964a 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -24,7 +24,6 @@
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"
 #include "mlir/IR/IRMapping.h"
-#include "mlir/IR/Value.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"