llvm · harrisonGPU · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024
@@ -1910,8 +1910,6 @@ void mlir::populateVectorToLLVMConversionPatterns(
                MaskedReductionOpConversion, VectorInterleaveOpLowering,
                VectorDeinterleaveOpLowering, VectorFromElementsLowering,
                VectorScalableStepOpLowering>(converter);
-  // Transfer ops with rank > 1 are handled by VectorToSCF.
-  populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
 }
 
 void mlir::populateVectorToLLVMMatrixConversionPatterns(

@@ -74,8 +74,6 @@ void ConvertVectorToLLVMPass::runOnOperation() {
     populateVectorInterleaveLoweringPatterns(patterns);
     populateVectorTransposeLoweringPatterns(patterns,
                                             VectorTransformsOptions());
-    // Vector transfer ops with rank > 1 should be lowered with VectorToSCF.
-    populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
@@ -84,7 +82,6 @@ void ConvertVectorToLLVMPass::runOnOperation() {
   LLVMTypeConverter converter(&getContext(), options);
   RewritePatternSet patterns(&getContext());
   populateVectorMaskMaterializationPatterns(patterns, force32BitVectorIndices);
-  populateVectorTransferLoweringPatterns(patterns);
   populateVectorToLLVMMatrixConversionPatterns(converter, patterns);
   populateVectorToLLVMConversionPatterns(
       converter, patterns, reassociateFPReductions, force32BitVectorIndices);

@@ -97,9 +97,6 @@ struct TransferReadPermutationLowering
   matchAndRewriteMaskableOp(vector::TransferReadOp op,
                             MaskingOpInterface maskOp,
                             PatternRewriter &rewriter) const override {
-    // TODO: support 0-d corner case.
-    if (op.getTransferRank() == 0)
-      return rewriter.notifyMatchFailure(op, "0-d corner case not supported");
     // TODO: Support transfer_read inside MaskOp case.
     if (maskOp)
       return rewriter.notifyMatchFailure(op, "Masked case not supported");
@@ -326,9 +323,6 @@ struct TransferOpReduceRank
   matchAndRewriteMaskableOp(vector::TransferReadOp op,
                             MaskingOpInterface maskOp,
                             PatternRewriter &rewriter) const override {
-    // TODO: support 0-d corner case.
-    if (op.getTransferRank() == 0)
-      return rewriter.notifyMatchFailure(op, "0-d corner case not supported");
     // TODO: support masked case.
     if (maskOp)
       return rewriter.notifyMatchFailure(op, "Masked case not supported");
@@ -642,10 +636,10 @@ struct TransferWriteToVectorStoreLowering
 void mlir::vector::populateVectorTransferLoweringPatterns(
     RewritePatternSet &patterns, std::optional<unsigned> maxTransferRank,
     PatternBenefit benefit) {
-  patterns.add<TransferReadToVectorLoadLowering,
-               TransferWriteToVectorStoreLowering>(patterns.getContext(),
-                                                   maxTransferRank, benefit);
-  patterns
-      .add<VectorLoadToMemrefLoadLowering, VectorStoreToMemrefStoreLowering>(
-          patterns.getContext(), benefit);
+  // patterns.add<TransferReadToVectorLoadLowering,
+  //              TransferWriteToVectorStoreLowering>(patterns.getContext(),
+  //                                                  maxTransferRank, benefit);
+  // patterns
+  //     .add<VectorLoadToMemrefLoadLowering, VectorStoreToMemrefStoreLowering>(
+  //         patterns.getContext(), benefit);
 }
@@ -3,10 +3,7 @@
   func.func @warp_extract(%arg0: index, %arg1: memref<1024x1024xf32>, %arg2: index, %arg3: vector<1xf32>) {
     %c0 = arith.constant 0 : index
     vector.warp_execute_on_lane_0(%arg0)[32] {
-      // CHECK:%[[val:[0-9]+]] = llvm.extractelement
-      // CHECK:%[[base:[0-9]+]] = llvm.extractvalue
-      // CHECK:%[[ptr:[0-9]+]] = llvm.getelementptr %[[base]]
-      // CHECK:llvm.store %[[val]], %[[ptr]]
+      // CHECK: vector.transfer_write %arg9, %[[MEM:.*]][%[[IDX:.*]], %[[IDX]]] {in_bounds = [true]} : vector<1xf32>, memref<1024x1024xf32>
       vector.transfer_write %arg3, %arg1[%c0, %c0] {in_bounds = [true]} : vector<1xf32>, memref<1024x1024xf32>
     }
     return

@@ -2953,12 +2953,16 @@ func.func @vector_load_op_0d(%memref : memref<200x100xf32>, %i : index, %j : ind
 }
 
 // CHECK-LABEL: func @vector_load_op_0d
-// CHECK: %[[load:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}]
-// CHECK: %[[vec:.*]] = llvm.mlir.undef : vector<1xf32>
-// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK: %[[inserted:.*]] = llvm.insertelement %[[load]], %[[vec]][%[[c0]] : i32] : vector<1xf32>
-// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %[[inserted]] : vector<1xf32> to vector<f32>
-// CHECK: return %[[cast]] : vector<f32>
+// CHECK: %[[S0:.*]] = builtin.unrealized_conversion_cast %arg2 : index to i64
+// CHECK: %[[S1:.*]] = builtin.unrealized_conversion_cast %arg1 : index to i64
+// CHECK: %[[S2:.*]] = builtin.unrealized_conversion_cast %arg0 : memref<200x100xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S3:.*]] = llvm.extractvalue %[[S2]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S4:.*]] = llvm.mlir.constant(100 : index) : i64
+// CHECK: %[[S5:.*]] = llvm.mul %[[S1]], %[[S4]] : i64
+// CHECK: %[[S6:.*]] = llvm.add %[[S5]], %[[S0]] : i64
+// CHECK: %[[S7:.*]] = llvm.getelementptr %[[S3]][%[[S6]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+// CHECK: %[[S8:.*]] = llvm.load %[[S7]] {alignment = 4 : i64} : !llvm.ptr -> vector<1xf32>
+// CHECK: %[[S9:.*]] = builtin.unrealized_conversion_cast %[[S8]] : vector<1xf32> to vector<f32>
 
 // -----
 
@@ -2969,11 +2973,17 @@ func.func @vector_store_op_0d(%memref : memref<200x100xf32>, %i : index, %j : in
 }
 
 // CHECK-LABEL: func @vector_store_op_0d
-// CHECK: %[[val:.*]] = arith.constant dense<1.100000e+01> : vector<f32>
-// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %[[val]] : vector<f32> to vector<1xf32>
-// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
-// CHECK: %[[extracted:.*]] = llvm.extractelement %[[cast]][%[[c0]] : i64] : vector<1xf32>
-// CHECK: memref.store %[[extracted]], %{{.*}}[%{{.*}}, %{{.*}}]
+// CHECK: %[[S0:.*]] = builtin.unrealized_conversion_cast %arg2 : index to i64
+// CHECK: %[[S1:.*]] = builtin.unrealized_conversion_cast %arg1 : index to i64
+// CHECK: %[[S2:.*]] = builtin.unrealized_conversion_cast %arg0 : memref<200x100xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S3:.*]] = arith.constant dense<1.100000e+01> : vector<f32>
+// CHECK: %[[S4:.*]] = builtin.unrealized_conversion_cast %[[S3]] : vector<f32> to vector<1xf32>
+// CHECK: %[[S5:.*]] = llvm.extractvalue %[[S2]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S6:.*]] = llvm.mlir.constant(100 : index) : i64
+// CHECK: %[[S7:.*]] = llvm.mul %[[S1]], %[[S6]] : i64
+// CHECK: %[[S8:.*]] = llvm.add %[[S7]], %[[S0]] : i64
+// CHECK: %[[S9:.*]] = llvm.getelementptr %[[S5]][%[[S8]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+// CHECK: llvm.store %[[S4]], %[[S9]] {alignment = 4 : i64} : vector<1xf32>, !llvm.ptr
 
 // -----