Use unroll options

nbpatel · nbpatel · commit 57cc380c625d · 2025-06-19T17:31:28.000Z
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -1673,7 +1673,9 @@ def Vector_TransferWriteOp :
   let hasVerifier = 1;
 }
 
-def Vector_LoadOp : Vector_Op<"load"> {
+def Vector_LoadOp : Vector_Op<"load", [
+    DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
+  ]> {
   let summary = "reads an n-D slice of memory into an n-D vector";
   let description = [{
     The 'vector.load' operation reads an n-D slice of memory into an n-D
@@ -1759,7 +1761,9 @@ def Vector_LoadOp : Vector_Op<"load"> {
       "$base `[` $indices `]` attr-dict `:` type($base) `,` type($result)";
 }
 
-def Vector_StoreOp : Vector_Op<"store"> {
+def Vector_StoreOp : Vector_Op<"store", [
+    DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
+  ]> {
   let summary = "writes an n-D vector to an n-D slice of memory";
   let description = [{
     The 'vector.store' operation writes an n-D vector to an n-D slice of memory.
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -5266,6 +5266,10 @@ OpFoldResult LoadOp::fold(FoldAdaptor) {
   return OpFoldResult();
 }
 
+std::optional<SmallVector<int64_t, 4>> LoadOp::getShapeForUnroll() {
+  return llvm::to_vector<4>(getVectorType().getShape());
+}
+
 //===----------------------------------------------------------------------===//
 // StoreOp
 //===----------------------------------------------------------------------===//
@@ -5301,6 +5305,10 @@ LogicalResult StoreOp::fold(FoldAdaptor adaptor,
   return memref::foldMemRefCast(*this);
 }
 
+std::optional<SmallVector<int64_t, 4>> StoreOp::getShapeForUnroll() {
+  return llvm::to_vector<4>(getVectorType().getShape());
+}
+
 //===----------------------------------------------------------------------===//
 // MaskedLoadOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
@@ -653,21 +653,6 @@ struct UnrollGatherPattern : public OpRewritePattern<vector::GatherOp> {
   vector::UnrollVectorOptions options;
 };
 
-// This pattern unrolls the vector load into multiple 1D vector loads by
-// extracting slices from the base memory and inserting them into the result
-// vector using vector.insert_strided_slice.
-// Following,
-//   vector.load %base[%indices] : memref<4x4xf32>, vector<4x4xf32>
-// is converted to :
-//   %cst = arith.constant dense<0.0> : vector<4x4xf32>
-//   %slice_0 = vector.load %base[%indices] : memref<4x4xf32>, vector<4xf32>
-//   %result_0 = vector.insert_strided_slice %slice_0, %cst
-//     {offsets = [0, 0], strides = [1]} : vector<4xf32> into vector<4x4xf32>
-//   %slice_1 = vector.load %base[%indices + 1]
-//     : memref<4x4xf32>, vector<4xf32>
-//   %result_1 = vector.insert_strided_slice %slice_1, %result_0
-//     {offsets = [1, 0], strides = [1]} : vector<4xf32> into vector<4x4xf32>
-//   ...
 struct UnrollLoadPattern : public OpRewritePattern<vector::LoadOp> {
   UnrollLoadPattern(MLIRContext *context,
                     const vector::UnrollVectorOptions &options,
@@ -677,37 +662,37 @@ struct UnrollLoadPattern : public OpRewritePattern<vector::LoadOp> {
   LogicalResult matchAndRewrite(vector::LoadOp loadOp,
                                 PatternRewriter &rewriter) const override {
     VectorType vecType = loadOp.getVectorType();
-    // Only unroll >1D loads
     if (vecType.getRank() <= 1)
       return failure();
 
+    auto targetShape = getTargetShape(options, loadOp);
+    if (!targetShape)
+      return failure();
+
     Location loc = loadOp.getLoc();
     ArrayRef<int64_t> originalShape = vecType.getShape();
-
-    // Target type is a 1D vector of the innermost dimension.
-    auto targetType =
-        VectorType::get(originalShape.back(), vecType.getElementType());
-
-    // Extend the targetShape to the same rank of original shape by padding 1s
-    // for leading dimensions for convenience of computing offsets
-    SmallVector<int64_t> targetShape(originalShape.size(), 1);
-    targetShape.back() = originalShape.back();
+    SmallVector<int64_t> strides(targetShape->size(), 1);
 
     Value result = rewriter.create<arith::ConstantOp>(
         loc, vecType, rewriter.getZeroAttr(vecType));
 
     SmallVector<Value> originalIndices(loadOp.getIndices().begin(),
                                        loadOp.getIndices().end());
 
+    SmallVector<int64_t> loopOrder =
+        getUnrollOrder(originalShape.size(), loadOp, options);
+
+    auto targetVecType =
+        VectorType::get(*targetShape, vecType.getElementType());
+
     for (SmallVector<int64_t> offsets :
-         StaticTileOffsetRange(originalShape, targetShape)) {
+         StaticTileOffsetRange(originalShape, *targetShape, loopOrder)) {
       SmallVector<Value> indices =
           computeIndices(rewriter, loc, originalIndices, offsets);
-      Value slice = rewriter.create<vector::LoadOp>(loc, targetType,
+      Value slice = rewriter.create<vector::LoadOp>(loc, targetVecType,
                                                     loadOp.getBase(), indices);
-      // Insert the slice into the result at the correct position.
       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(
-          loc, slice, result, offsets, SmallVector<int64_t>({1}));
+          loc, slice, result, offsets, strides);
     }
     rewriter.replaceOp(loadOp, result);
     return success();
@@ -717,17 +702,6 @@ struct UnrollLoadPattern : public OpRewritePattern<vector::LoadOp> {
   vector::UnrollVectorOptions options;
 };
 
-// This pattern unrolls the vector store into multiple 1D vector stores by
-// extracting slices from the source vector and storing them into the
-// destination.
-// Following,
-//   vector.store %source, %base[%indices] : vector<4x4xf32>
-// is converted to :
-//   %slice_0 = vector.extract %source[0] : vector<4xf32>
-//   vector.store %slice_0, %base[%indices] : vector<4xf32>
-//   %slice_1 = vector.extract %source[1] : vector<4xf32>
-//   vector.store %slice_1, %base[%indices + 1] : vector<4xf32>
-//   ...
 struct UnrollStorePattern : public OpRewritePattern<vector::StoreOp> {
   UnrollStorePattern(MLIRContext *context,
                      const vector::UnrollVectorOptions &options,
@@ -737,30 +711,32 @@ struct UnrollStorePattern : public OpRewritePattern<vector::StoreOp> {
   LogicalResult matchAndRewrite(vector::StoreOp storeOp,
                                 PatternRewriter &rewriter) const override {
     VectorType vecType = storeOp.getVectorType();
-    // Only unroll >1D stores.
     if (vecType.getRank() <= 1)
       return failure();
 
+    auto targetShape = getTargetShape(options, storeOp);
+    if (!targetShape)
+      return failure();
+
     Location loc = storeOp.getLoc();
     ArrayRef<int64_t> originalShape = vecType.getShape();
-
-    // Extend the targetShape to the same rank of original shape by padding 1s
-    // for leading dimensions for convenience of computing offsets
-    SmallVector<int64_t> targetShape(originalShape.size(), 1);
-    targetShape.back() = originalShape.back();
+    SmallVector<int64_t> strides(targetShape->size(), 1);
 
     Value base = storeOp.getBase();
     Value vector = storeOp.getValueToStore();
 
     SmallVector<Value> originalIndices(storeOp.getIndices().begin(),
                                        storeOp.getIndices().end());
 
+    SmallVector<int64_t> loopOrder =
+        getUnrollOrder(originalShape.size(), storeOp, options);
+
     for (SmallVector<int64_t> offsets :
-         StaticTileOffsetRange(originalShape, targetShape)) {
+         StaticTileOffsetRange(originalShape, *targetShape, loopOrder)) {
       SmallVector<Value> indices =
           computeIndices(rewriter, loc, originalIndices, offsets);
-      offsets.pop_back();
-      Value slice = rewriter.create<vector::ExtractOp>(loc, vector, offsets);
+      Value slice = rewriter.createOrFold<vector::ExtractStridedSliceOp>(
+          loc, vector, offsets, *targetShape, strides);
       rewriter.create<vector::StoreOp>(loc, slice, base, indices);
     }
     rewriter.eraseOp(storeOp);
diff --git a/mlir/test/Dialect/Vector/vector-unroll-options.mlir b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
@@ -388,19 +388,17 @@ func.func @vector_load_2D(%mem: memref<4x4xf16>) -> vector<4x4xf16> {
 
 // CHECK-LABEL: func.func @vector_load_2D(
 // CHECK-SAME:  %[[ARG:.*]]: memref<4x4xf16>) -> vector<4x4xf16> {
-  // CHECK: %[[C3:.*]] = arith.constant 3 : index
   // CHECK: %[[C2:.*]] = arith.constant 2 : index
-  // CHECK: %[[C1:.*]] = arith.constant 1 : index
   // CHECK: %[[C0:.*]] = arith.constant 0 : index
   // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<4x4xf16>
-  // CHECK: %[[V0:.*]] = vector.load %[[ARG]][%[[C0]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
-  // CHECK: %[[V1:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1]} : vector<4xf16> into vector<4x4xf16>
-  // CHECK: %[[V2:.*]] = vector.load %[[ARG]][%[[C1]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
-  // CHECK: %[[V3:.*]] = vector.insert_strided_slice %[[V2]], %[[V1]] {offsets = [1, 0], strides = [1]} : vector<4xf16> into vector<4x4xf16>
-  // CHECK: %[[V4:.*]] = vector.load %[[ARG]][%[[C2]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
-  // CHECK: %[[V5:.*]] = vector.insert_strided_slice %[[V4]], %[[V3]] {offsets = [2, 0], strides = [1]} : vector<4xf16> into vector<4x4xf16>
-  // CHECK: %[[V6:.*]] = vector.load %[[ARG]][%[[C3]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
-  // CHECK: %[[V7:.*]] = vector.insert_strided_slice %[[V6]], %[[V5]] {offsets = [3, 0], strides = [1]} : vector<4xf16> into vector<4x4xf16>
+  // CHECK: %[[V0:.*]] = vector.load %[[ARG]][%[[C0]], %[[C0]]] : memref<4x4xf16>, vector<2x2xf16>
+  // CHECK: %[[V1:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf16> into vector<4x4xf16>
+  // CHECK: %[[V2:.*]] = vector.load %[[ARG]][%[[C0]], %[[C2]]] : memref<4x4xf16>, vector<2x2xf16>
+  // CHECK: %[[V3:.*]] = vector.insert_strided_slice %[[V2]], %[[V1]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf16> into vector<4x4xf16>
+  // CHECK: %[[V4:.*]] = vector.load %[[ARG]][%[[C2]], %[[C0]]] : memref<4x4xf16>, vector<2x2xf16>
+  // CHECK: %[[V5:.*]] = vector.insert_strided_slice %[[V4]], %[[V3]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf16> into vector<4x4xf16>
+  // CHECK: %[[V6:.*]] = vector.load %[[ARG]][%[[C2]], %[[C2]]] : memref<4x4xf16>, vector<2x2xf16>
+  // CHECK: %[[V7:.*]] = vector.insert_strided_slice %[[V6]], %[[V5]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf16> into vector<4x4xf16>
   // CHECK: return %[[V7]] : vector<4x4xf16>
 
 
@@ -412,48 +410,13 @@ func.func @vector_store_2D(%mem: memref<4x4xf16>, %v: vector<4x4xf16>) {
 
 // CHECK-LABEL: func.func @vector_store_2D(
 // CHECK-SAME:  %[[ARG0:.*]]: memref<4x4xf16>, %[[ARG1:.*]]: vector<4x4xf16>) {
-  // CHECK: %[[C3:.*]] = arith.constant 3 : index
   // CHECK: %[[C2:.*]] = arith.constant 2 : index
-  // CHECK: %[[C1:.*]] = arith.constant 1 : index
   // CHECK: %[[C0:.*]] = arith.constant 0 : index
-  // CHECK: %[[V0:.*]] = vector.extract %[[ARG1]][0] : vector<4xf16> from vector<4x4xf16>
-  // CHECK: vector.store %[[V0]], %[[ARG0]][%[[C0]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
-  // CHECK: %[[V1:.*]] = vector.extract %[[ARG1]][1] : vector<4xf16> from vector<4x4xf16>
-  // CHECK: vector.store %[[V1]], %[[ARG0]][%[[C1]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
-  // CHECK: %[[V2:.*]] = vector.extract %[[ARG1]][2] : vector<4xf16> from vector<4x4xf16>
-  // CHECK: vector.store %[[V2]], %[[ARG0]][%[[C2]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
-  // CHECK: %[[V3:.*]] = vector.extract %[[ARG1]][3] : vector<4xf16> from vector<4x4xf16>
-  // CHECK: vector.store %[[V3]], %[[ARG0]][%[[C3]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
-
-
-func.func @vector_load_4D_to_2D(%mem: memref<4x4x4x4xf16>) -> vector<2x2xf16> {
-  %c1 = arith.constant 1 : index
-  %0 = vector.load %mem[%c1, %c1, %c1, %c1] : memref<4x4x4x4xf16>, vector<2x2xf16>
-  return %0 : vector<2x2xf16>
-}
-
-// CHECK-LABEL: func.func @vector_load_4D_to_2D(
-// CHECK-SAME:  %[[ARG:.*]]: memref<4x4x4x4xf16>) -> vector<2x2xf16> {
-  // CHECK: %[[C2:.*]] = arith.constant 2 : index
-  // CHECK: %[[C1:.*]] = arith.constant 1 : index
-  // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
-  // CHECK: %[[V0:.*]] = vector.load %[[ARG]][%[[C1]], %[[C1]], %[[C1]], %[[C1]]] : memref<4x4x4x4xf16>, vector<2xf16>
-  // CHECK: %[[V1:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1]} : vector<2xf16> into vector<2x2xf16>
-  // CHECK: %[[V2:.*]] = vector.load %[[ARG]][%[[C1]], %[[C1]], %[[C2]], %[[C1]]] : memref<4x4x4x4xf16>, vector<2xf16>
-  // CHECK: %[[V3:.*]] = vector.insert_strided_slice %[[V2]], %[[V1]] {offsets = [1, 0], strides = [1]} : vector<2xf16> into vector<2x2xf16>
-  // CHECK: return %[[V3]] : vector<2x2xf16>
-
-func.func @vector_store_2D_to_4D(%mem: memref<4x4x4x4xf16>, %v: vector<2x2xf16>) {
-  %c1 = arith.constant 1 : index
-  vector.store %v, %mem[%c1, %c1, %c1, %c1] : memref<4x4x4x4xf16>, vector<2x2xf16>
-  return
-}
-
-// CHECK-LABEL: func.func @vector_store_2D_to_4D(
-// CHECK-SAME:  %[[ARG0:.*]]: memref<4x4x4x4xf16>, %[[ARG1:.*]]: vector<2x2xf16>) {
-  // CHECK: %[[C2:.*]] = arith.constant 2 : index
-  // CHECK: %[[C1:.*]] = arith.constant 1 : index
-  // CHECK: %[[V0:.*]] = vector.extract %[[ARG1]][0] : vector<2xf16> from vector<2x2xf16>
-  // CHECK: vector.store %[[V0]], %[[ARG0]][%[[C1]], %[[C1]], %[[C1]], %[[C1]]] : memref<4x4x4x4xf16>, vector<2xf16>
-  // CHECK: %[[V1:.*]] = vector.extract %[[ARG1]][1] : vector<2xf16> from vector<2x2xf16>
-  // CHECK: vector.store %[[V1]], %[[ARG0]][%[[C1]], %[[C1]], %[[C2]], %[[C1]]] : memref<4x4x4x4xf16>, vector<2xf16>
+  // CHECK: %[[V0:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf16> to vector<2x2xf16>
+  // CHECK: vector.store %[[V0]], %[[ARG0]][%[[C0]], %[[C0]]] : memref<4x4xf16>, vector<2x2xf16>
+  // CHECK: %[[V1:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf16> to vector<2x2xf16>
+  // CHECK: vector.store %[[V1]], %[[ARG0]][%[[C0]], %[[C2]]] : memref<4x4xf16>, vector<2x2xf16>
+  // CHECK: %[[V2:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf16> to vector<2x2xf16>
+  // CHECK: vector.store %[[V2]], %[[ARG0]][%[[C2]], %[[C0]]] : memref<4x4xf16>, vector<2x2xf16>
+  // CHECK: %[[V3:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf16> to vector<2x2xf16>
+  // CHECK: vector.store %[[V3]], %[[ARG0]][%[[C2]], %[[C2]]] : memref<4x4xf16>, vector<2x2xf16>
diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
@@ -163,7 +163,7 @@ struct TestVectorUnrollingPatterns
             .setFilterConstraint([](Operation *op) {
               return success(
                   isa<arith::AddFOp, vector::FMAOp, vector::MultiDimReductionOp,
-                      vector::BroadcastOp>(op));
+                      vector::BroadcastOp, vector::LoadOp, vector::StoreOp>(op));
             }));
     populateVectorUnrollPatterns(
         patterns, UnrollVectorOptions()
@@ -178,16 +178,6 @@ struct TestVectorUnrollingPatterns
                         return success(isa<vector::TransposeOp>(op));
                       }));
 
-    populateVectorUnrollPatterns(
-        patterns, UnrollVectorOptions()
-                      .setNativeShape(ArrayRef<int64_t>{2, 2})
-                      .setFilterConstraint([](Operation *op) {
-                        if (auto loadOp = dyn_cast<vector::LoadOp>(op))
-                          return success(loadOp.getType().getRank() > 1);
-                        if (auto storeOp = dyn_cast<vector::StoreOp>(op))
-                          return success(storeOp.getVectorType().getRank() > 1);
-                        return failure();
-                      }));
     if (unrollBasedOnType) {
       UnrollVectorOptions::NativeShapeFnType nativeShapeFn =
           [](Operation *op) -> std::optional<SmallVector<int64_t>> {