iree-org
diff --git a/‎mlir/lib/Dialect/Vector/IR/VectorOps.cpp‎
Lines changed: 7 additions & 142 deletions b/‎mlir/lib/Dialect/Vector/IR/VectorOps.cpp‎
Lines changed: 7 additions & 142 deletions
diff --git a/‎mlir/test/Conversion/ConvertToSPIRV/vector-unroll.mlir‎
Lines changed: 8 additions & 4 deletions b/‎mlir/test/Conversion/ConvertToSPIRV/vector-unroll.mlir‎
Lines changed: 8 additions & 4 deletions
@@ -3286,18 +3286,6 @@ LogicalResult InsertOp::verify() {
   return success();
 }
 
-// Calculate the linearized position of the continuous chunk of elements to
-// insert, based on the shape of the value to insert and the positions to insert
-// at.
-static int64_t calculateInsertPosition(VectorType destTy,
-                                       ArrayRef<int64_t> positions) {
-  llvm::SmallVector<int64_t> completePositions(destTy.getRank(), 0);
-  assert(positions.size() <= completePositions.size() &&
-         "positions size must be less than or equal to destTy rank");
-  copy(positions, completePositions.begin());
-  return linearize(completePositions, computeStrides(destTy.getShape()));
-}
-
 namespace {
 
 // If insertOp is only inserting unit dimensions it can be transformed to a
@@ -3335,132 +3323,6 @@ class InsertSplatToSplat final : public OpRewritePattern<InsertOp> {
     return success();
   }
 };
-
-/// Pattern to optimize a chain of insertions.
-///
-/// This pattern identifies chains of vector.insert operations that:
-/// 1. Only insert values at static positions.
-/// 2. Completely initialize all elements in the resulting vector.
-/// 3. All intermediate insert operations have only one use.
-///
-/// When these conditions are met, the entire chain can be replaced with a
-/// single vector.from_elements operation.
-///
-/// To keep this pattern simple, and avoid spending too much time on matching
-/// fragmented insert chains, this pattern only considers the last insert op in
-/// the chain.
-///
-/// Example transformation:
-///   %poison = ub.poison : vector<2xi32>
-///   %0 = vector.insert %c1, %poison[0] : i32 into vector<2xi32>
-///   %1 = vector.insert %c2, %0[1] : i32 into vector<2xi32>
-/// ->
-///   %result = vector.from_elements %c1, %c2 : vector<2xi32>
-class InsertChainFullyInitialized final : public OpRewritePattern<InsertOp> {
-public:
-  using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(InsertOp op,
-                                PatternRewriter &rewriter) const override {
-
-    VectorType destTy = op.getDestVectorType();
-    if (destTy.isScalable())
-      return failure();
-    // Ensure this is the trailing vector.insert op in a chain of inserts.
-    for (Operation *user : op.getResult().getUsers())
-      if (auto insertOp = dyn_cast<InsertOp>(user))
-        if (insertOp.getDest() == op.getResult())
-          return failure();
-
-    InsertOp currentOp = op;
-    SmallVector<InsertOp> chainInsertOps;
-    while (currentOp) {
-      // Check cond 1: Dynamic position is not supported.
-      if (currentOp.hasDynamicPosition())
-        return failure();
-
-      chainInsertOps.push_back(currentOp);
-      currentOp = currentOp.getDest().getDefiningOp<InsertOp>();
-      // Check cond 3: Intermediate inserts have only one use to avoid an
-      // explosion of vectors.
-      if (currentOp && !currentOp->hasOneUse())
-        return failure();
-    }
-
-    int64_t vectorSize = destTy.getNumElements();
-    int64_t initializedCount = 0;
-    SmallVector<bool> initializedDestIdxs(vectorSize, false);
-    SmallVector<int64_t> pendingInsertPos;
-    SmallVector<int64_t> pendingInsertSize;
-    SmallVector<Value> pendingInsertValues;
-
-    for (auto insertOp : chainInsertOps) {
-      // This pattern can do nothing with poison index.
-      if (is_contained(insertOp.getStaticPosition(), InsertOp::kPoisonIndex))
-        return failure();
-
-      // Calculate the linearized position for inserting elements.
-      int64_t insertBeginPosition =
-          calculateInsertPosition(destTy, insertOp.getStaticPosition());
-
-      // The valueToStore operand may be a vector or a scalar. Need to handle
-      // both cases.
-      int64_t insertSize = 1;
-      if (auto srcVectorType =
-              llvm::dyn_cast<VectorType>(insertOp.getValueToStoreType()))
-        insertSize = srcVectorType.getNumElements();
-
-      assert(insertBeginPosition + insertSize <= vectorSize &&
-             "insert would overflow the vector");
-
-      for (auto index : llvm::seq<int64_t>(insertBeginPosition,
-                                           insertBeginPosition + insertSize)) {
-        if (initializedDestIdxs[index])
-          continue;
-        initializedDestIdxs[index] = true;
-        ++initializedCount;
-      }
-
-      // Defer the creation of ops before we can make sure the pattern can
-      // succeed.
-      pendingInsertPos.push_back(insertBeginPosition);
-      pendingInsertSize.push_back(insertSize);
-      pendingInsertValues.push_back(insertOp.getValueToStore());
-
-      if (initializedCount == vectorSize)
-        break;
-    }
-
-    // Check cond 2: all positions must be initialized.
-    if (initializedCount != vectorSize)
-      return failure();
-
-    SmallVector<Value> elements(vectorSize);
-    for (auto [insertBeginPosition, insertSize, valueToStore] :
-         llvm::reverse(llvm::zip(pendingInsertPos, pendingInsertSize,
-                                 pendingInsertValues))) {
-      auto srcVectorType = llvm::dyn_cast<VectorType>(valueToStore.getType());
-
-      if (!srcVectorType) {
-        elements[insertBeginPosition] = valueToStore;
-        continue;
-      }
-
-      SmallVector<Type> elementToInsertTypes(insertSize,
-                                             srcVectorType.getElementType());
-      // Get all elements from the vector in row-major order.
-      auto elementsToInsert = rewriter.create<vector::ToElementsOp>(
-          op.getLoc(), elementToInsertTypes, valueToStore);
-      for (int64_t linearIdx = 0; linearIdx < insertSize; linearIdx++) {
-        elements[insertBeginPosition + linearIdx] =
-            elementsToInsert.getResult(linearIdx);
-      }
-    }
-
-    rewriter.replaceOpWithNewOp<vector::FromElementsOp>(op, destTy, elements);
-    return success();
-  }
-};
-
 } // namespace
 
 static Attribute
@@ -3487,9 +3349,13 @@ foldDenseElementsAttrDestInsertOp(InsertOp insertOp, Attribute srcAttr,
       !insertOp->hasOneUse())
     return {};
 
-  // Calculate the linearized position for inserting elements.
+  // Calculate the linearized position of the continuous chunk of elements to
+  // insert.
+  llvm::SmallVector<int64_t> completePositions(destTy.getRank(), 0);
+  copy(insertOp.getStaticPosition(), completePositions.begin());
   int64_t insertBeginPosition =
-      calculateInsertPosition(destTy, insertOp.getStaticPosition());
+      linearize(completePositions, computeStrides(destTy.getShape()));
+
   SmallVector<Attribute> insertedValues;
   Type destEltType = destTy.getElementType();
 
@@ -3525,8 +3391,7 @@ static Value foldInsertUseChain(InsertOp insertOp) {
 
 void InsertOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                            MLIRContext *context) {
-  results.add<InsertToBroadcast, BroadcastFolder, InsertSplatToSplat,
-              InsertChainFullyInitialized>(context);
+  results.add<InsertToBroadcast, BroadcastFolder, InsertSplatToSplat>(context);
 }
 
 OpFoldResult InsertOp::fold(FoldAdaptor adaptor) {
 
@@ -83,16 +83,20 @@ func.func @vaddi_reduction(%arg0 : vector<8xi32>, %arg1 : vector<8xi32>) -> (i32
 // CHECK-LABEL: @transpose
 // CHECK-SAME: (%[[ARG0:.+]]: vector<3xi32>, %[[ARG1:.+]]: vector<3xi32>)
 func.func @transpose(%arg0 : vector<2x3xi32>) -> (vector<3x2xi32>) {
+  // CHECK: %[[UB:.*]] = ub.poison : vector<2xi32>
   // CHECK: %[[EXTRACT0:.*]] = vector.extract %[[ARG0]][0] : i32 from vector<3xi32>
+  // CHECK: %[[INSERT0:.*]]= vector.insert %[[EXTRACT0]], %[[UB]] [0] : i32 into vector<2xi32>
   // CHECK: %[[EXTRACT1:.*]] = vector.extract %[[ARG1]][0] : i32 from vector<3xi32>
-  // CHECK: %[[FROM_ELEMENTS0:.*]] = vector.from_elements %[[EXTRACT0]], %[[EXTRACT1]] : vector<2xi32>
+  // CHECK: %[[INSERT1:.*]] = vector.insert %[[EXTRACT1]], %[[INSERT0]][1] : i32 into vector<2xi32>
   // CHECK: %[[EXTRACT2:.*]] = vector.extract %[[ARG0]][1] : i32 from vector<3xi32>
+  // CHECK: %[[INSERT2:.*]] = vector.insert %[[EXTRACT2]], %[[UB]] [0] : i32 into vector<2xi32>
   // CHECK: %[[EXTRACT3:.*]] = vector.extract %[[ARG1]][1] : i32 from vector<3xi32>
-  // CHECK: %[[FROM_ELEMENTS1:.*]] = vector.from_elements %[[EXTRACT2]], %[[EXTRACT3]] : vector<2xi32>
+  // CHECK: %[[INSERT3:.*]] = vector.insert %[[EXTRACT3]], %[[INSERT2]] [1] : i32 into vector<2xi32>
   // CHECK: %[[EXTRACT4:.*]] = vector.extract %[[ARG0]][2] : i32 from vector<3xi32>
+  // CHECK: %[[INSERT4:.*]] = vector.insert %[[EXTRACT4]], %[[UB]] [0] : i32 into vector<2xi32>
   // CHECK: %[[EXTRACT5:.*]] = vector.extract %[[ARG1]][2] : i32 from vector<3xi32>
-  // CHECK: %[[FROM_ELEMENTS2:.*]] = vector.from_elements %[[EXTRACT4]], %[[EXTRACT5]] : vector<2xi32>
-  // CHECK: return %[[FROM_ELEMENTS0]], %[[FROM_ELEMENTS1]], %[[FROM_ELEMENTS2]] : vector<2xi32>, vector<2xi32>, vector<2xi32>
+  // CHECK: %[[INSERT5:.*]] = vector.insert %[[EXTRACT5]], %[[INSERT4]] [1] : i32 into vector<2xi32>
+  // CHECK: return %[[INSERT1]], %[[INSERT3]], %[[INSERT5]] : vector<2xi32>, vector<2xi32>, vector<2xi32>
   %0 = vector.transpose %arg0, [1, 0] : vector<2x3xi32> to vector<3x2xi32>
   return %0 : vector<3x2xi32>
 }