Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 191 additions & 32 deletions mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2377,7 +2377,7 @@ func.func @matrix_ops_index(%A: vector<64xindex>, %B: vector<48xindex>) -> vecto

// -----

func.func @transfer_read_1d(%A : memref<?xf32>, %base: index) -> vector<17xf32> {
func.func @transfer_read_write_1d(%A : memref<?xf32>, %base: index) -> vector<17xf32> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file has become too big. Would you consider separating 'vector-transer-to-llvm.mlir like existing vector-mask-to-llvm ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great suggestion, see the latest commit :)

%f7 = arith.constant 7.0: f32
%f = vector.transfer_read %A[%base], %f7
{permutation_map = affine_map<(d0) -> (d0)>} :
Expand All @@ -2387,7 +2387,7 @@ func.func @transfer_read_1d(%A : memref<?xf32>, %base: index) -> vector<17xf32>
vector<17xf32>, memref<?xf32>
return %f: vector<17xf32>
}
// CHECK-LABEL: func @transfer_read_1d
// CHECK-LABEL: func @transfer_read_write_1d
// CHECK-SAME: %[[MEM:.*]]: memref<?xf32>,
// CHECK-SAME: %[[BASE:.*]]: index) -> vector<17xf32>
// CHECK: %[[C7:.*]] = arith.constant 7.0
Expand Down Expand Up @@ -2449,9 +2449,77 @@ func.func @transfer_read_1d(%A : memref<?xf32>, %base: index) -> vector<17xf32>
// CHECK-SAME: {alignment = 4 : i32} :
// CHECK-SAME: vector<17xf32>, vector<17xi1> into !llvm.ptr

func.func @transfer_read_write_1d_scalable(%A : memref<?xf32>, %base: index) -> vector<[17]xf32> {
%f7 = arith.constant 7.0: f32
%f = vector.transfer_read %A[%base], %f7
{permutation_map = affine_map<(d0) -> (d0)>} :
memref<?xf32>, vector<[17]xf32>
vector.transfer_write %f, %A[%base]
{permutation_map = affine_map<(d0) -> (d0)>} :
vector<[17]xf32>, memref<?xf32>
return %f: vector<[17]xf32>
}
// CHECK-LABEL: func @transfer_read_write_1d_scalable
// CHECK-SAME: %[[MEM:.*]]: memref<?xf32>,
// CHECK-SAME: %[[BASE:.*]]: index) -> vector<[17]xf32>
// CHECK: %[[C7:.*]] = arith.constant 7.0
//
// 1. Let dim be the memref dimension, compute the in-bound index (dim - offset)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this commenting style of explaining sections of CHECKS.

// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[DIM:.*]] = memref.dim %[[MEM]], %[[C0]] : memref<?xf32>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit. commiting to tab space before ' CHECK' reduces readability for long lines unnecessarily. maybe reduce to ' CHECK'.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually just replicating the format of the previous test. So, if I was to update one, I'd need to update the other. But I really want to avoid the churn (there's been a lot of that already 😅 ).

// CHECK: %[[BOUND:.*]] = arith.subi %[[DIM]], %[[BASE]] : index
//
// 2. Create a vector with linear indices [ 0 .. vector_length - 1 ].
// CHECK: %[[linearIndex:.*]] = llvm.intr.stepvector : vector<[17]xi32>
//
// 3. Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
// CHECK: %[[btrunc:.*]] = arith.index_cast %[[BOUND]] : index to i32
// CHECK: %[[boundVecInsert:.*]] = llvm.insertelement %[[btrunc]]
// CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
// CHECK: %[[mask:.*]] = arith.cmpi slt, %[[linearIndex]], %[[boundVect]]
// CHECK-SAME: : vector<[17]xi32>
//
// 4. Create pass-through vector.
// CHECK: %[[PASS_THROUGH:.*]] = arith.constant dense<7.{{.*}}> : vector<[17]xf32>
//
// 5. Bitcast to vector form.
// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}} :
// CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr, f32
//
// 6. Rewrite as a masked read.
// CHECK: %[[loaded:.*]] = llvm.intr.masked.load %[[gep]], %[[mask]],
// CHECK-SAME: %[[PASS_THROUGH]] {alignment = 4 : i32} :
// CHECK-SAME: -> vector<[17]xf32>
//
// 1. Let dim be the memref dimension, compute the in-bound index (dim - offset)
// CHECK: %[[C0_b:.*]] = arith.constant 0 : index
// CHECK: %[[DIM_b:.*]] = memref.dim %[[MEM]], %[[C0_b]] : memref<?xf32>
// CHECK: %[[BOUND_b:.*]] = arith.subi %[[DIM_b]], %[[BASE]] : index
//
// 2. Create a vector with linear indices [ 0 .. vector_length - 1 ].
// CHECK: %[[linearIndex_b:.*]] = llvm.intr.stepvector : vector<[17]xi32>
//
// 3. Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
// CHECK: %[[btrunc_b:.*]] = arith.index_cast %[[BOUND_b]] : index to i32
// CHECK: %[[boundVecInsert_b:.*]] = llvm.insertelement %[[btrunc_b]]
// CHECK: %[[boundVect_b:.*]] = llvm.shufflevector %[[boundVecInsert_b]]
// CHECK: %[[mask_b:.*]] = arith.cmpi slt, %[[linearIndex_b]],
// CHECK-SAME: %[[boundVect_b]] : vector<[17]xi32>
//
// 4. Bitcast to vector form.
// CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} :
// CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr, f32
//
// 5. Rewrite as a masked write.
// CHECK: llvm.intr.masked.store %[[loaded]], %[[gep_b]], %[[mask_b]]
// CHECK-SAME: {alignment = 4 : i32} :
// CHECK-SAME: vector<[17]xf32>, vector<[17]xi1> into !llvm.ptr

// -----

func.func @transfer_read_index_1d(%A : memref<?xindex>, %base: index) -> vector<17xindex> {
func.func @transfer_read_write_index_1d(%A : memref<?xindex>, %base: index) -> vector<17xindex> {
%f7 = arith.constant 7: index
%f = vector.transfer_read %A[%base], %f7
{permutation_map = affine_map<(d0) -> (d0)>} :
Expand All @@ -2461,7 +2529,7 @@ func.func @transfer_read_index_1d(%A : memref<?xindex>, %base: index) -> vector<
vector<17xindex>, memref<?xindex>
return %f: vector<17xindex>
}
// CHECK-LABEL: func @transfer_read_index_1d
// CHECK-LABEL: func @transfer_read_write_index_1d
// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<17xindex>
// CHECK: %[[SPLAT:.*]] = arith.constant dense<7> : vector<17xindex>
// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[SPLAT]] : vector<17xindex> to vector<17xi64>
Expand All @@ -2472,6 +2540,27 @@ func.func @transfer_read_index_1d(%A : memref<?xindex>, %base: index) -> vector<
// CHECK: llvm.intr.masked.store %[[loaded]], %{{.*}}, %{{.*}} {alignment = 8 : i32} :
// CHECK-SAME: vector<17xi64>, vector<17xi1> into !llvm.ptr

func.func @transfer_read_write_index_1d_scalable(%A : memref<?xindex>, %base: index) -> vector<[17]xindex> {
%f7 = arith.constant 7: index
%f = vector.transfer_read %A[%base], %f7
{permutation_map = affine_map<(d0) -> (d0)>} :
memref<?xindex>, vector<[17]xindex>
vector.transfer_write %f, %A[%base]
{permutation_map = affine_map<(d0) -> (d0)>} :
vector<[17]xindex>, memref<?xindex>
return %f: vector<[17]xindex>
}
// CHECK-LABEL: func @transfer_read_write_index_1d
// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<[17]xindex>
// CHECK: %[[SPLAT:.*]] = arith.constant dense<7> : vector<[17]xindex>
// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[SPLAT]] : vector<[17]xindex> to vector<[17]xi64>

// CHECK: %[[loaded:.*]] = llvm.intr.masked.load %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} :
// CHECK-SAME: (!llvm.ptr, vector<[17]xi1>, vector<[17]xi64>) -> vector<[17]xi64>

// CHECK: llvm.intr.masked.store %[[loaded]], %{{.*}}, %{{.*}} {alignment = 8 : i32} :
// CHECK-SAME: vector<[17]xi64>, vector<[17]xi1> into !llvm.ptr

// -----

func.func @transfer_read_2d_to_1d(%A : memref<?x?xf32>, %base0: index, %base1: index) -> vector<17xf32> {
Expand Down Expand Up @@ -2501,9 +2590,34 @@ func.func @transfer_read_2d_to_1d(%A : memref<?x?xf32>, %base0: index, %base1: i
// CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
// CHECK: %[[mask:.*]] = arith.cmpi slt, %[[linearIndex]], %[[boundVect]]

func.func @transfer_read_2d_to_1d_scalable(%A : memref<?x?xf32>, %base0: index, %base1: index) -> vector<[17]xf32> {
%f7 = arith.constant 7.0: f32
%f = vector.transfer_read %A[%base0, %base1], %f7
{permutation_map = affine_map<(d0, d1) -> (d1)>} :
memref<?x?xf32>, vector<[17]xf32>
return %f: vector<[17]xf32>
}
// CHECK-LABEL: func @transfer_read_2d_to_1d
// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: index, %[[BASE_1:[a-zA-Z0-9]*]]: index) -> vector<[17]xf32>
// CHECK: %[[c1:.*]] = arith.constant 1 : index
// CHECK: %[[DIM:.*]] = memref.dim %{{.*}}, %[[c1]] : memref<?x?xf32>
//
// Compute the in-bound index (dim - offset)
// CHECK: %[[BOUND:.*]] = arith.subi %[[DIM]], %[[BASE_1]] : index
//
// Create a vector with linear indices [ 0 .. vector_length - 1 ].
// CHECK: %[[linearIndex:.*]] = llvm.intr.stepvector : vector<[17]xi32>
//
// Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
// CHECK: %[[btrunc:.*]] = arith.index_cast %[[BOUND]] : index to i32
// CHECK: %[[boundVecInsert:.*]] = llvm.insertelement %[[btrunc]]
// CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
// CHECK: %[[mask:.*]] = arith.cmpi slt, %[[linearIndex]], %[[boundVect]]

// -----

func.func @transfer_read_1d_non_zero_addrspace(%A : memref<?xf32, 3>, %base: index) -> vector<17xf32> {
func.func @transfer_read_write_1d_non_zero_addrspace(%A : memref<?xf32, 3>, %base: index) -> vector<17xf32> {
%f7 = arith.constant 7.0: f32
%f = vector.transfer_read %A[%base], %f7
{permutation_map = affine_map<(d0) -> (d0)>} :
Expand All @@ -2513,7 +2627,7 @@ func.func @transfer_read_1d_non_zero_addrspace(%A : memref<?xf32, 3>, %base: ind
vector<17xf32>, memref<?xf32, 3>
return %f: vector<17xf32>
}
// CHECK-LABEL: func @transfer_read_1d_non_zero_addrspace
// CHECK-LABEL: func @transfer_read_write_1d_non_zero_addrspace
// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<17xf32>
//
// 1. Check address space for GEP is correct.
Expand All @@ -2528,6 +2642,31 @@ func.func @transfer_read_1d_non_zero_addrspace(%A : memref<?xf32, 3>, %base: ind
// CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} :
// CHECK-SAME: (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32

func.func @transfer_read_write_1d_non_zero_addrspace_scalable(%A : memref<?xf32, 3>, %base: index) -> vector<[17]xf32> {
%f7 = arith.constant 7.0: f32
%f = vector.transfer_read %A[%base], %f7
{permutation_map = affine_map<(d0) -> (d0)>} :
memref<?xf32, 3>, vector<[17]xf32>
vector.transfer_write %f, %A[%base]
{permutation_map = affine_map<(d0) -> (d0)>} :
vector<[17]xf32>, memref<?xf32, 3>
return %f: vector<[17]xf32>
}
// CHECK-LABEL: func @transfer_read_write_1d_non_zero_addrspace_scalable
// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<[17]xf32>
//
// 1. Check address space for GEP is correct.
// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} :
// CHECK-SAME: (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32
//
// 2. Check address space of the memref is correct.
// CHECK: %[[c0:.*]] = arith.constant 0 : index
// CHECK: %[[DIM:.*]] = memref.dim %{{.*}}, %[[c0]] : memref<?xf32, 3>
//
// 3. Check address space for GEP is correct.
// CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} :
// CHECK-SAME: (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f32

// -----

func.func @transfer_read_1d_inbounds(%A : memref<?xf32>, %base: index) -> vector<17xf32> {
Expand All @@ -2546,51 +2685,71 @@ func.func @transfer_read_1d_inbounds(%A : memref<?xf32>, %base: index) -> vector
// 2. Rewrite as a load.
// CHECK: %[[loaded:.*]] = llvm.load %[[gep]] {alignment = 4 : i64} : !llvm.ptr -> vector<17xf32>

func.func @transfer_read_1d_inbounds_scalable(%A : memref<?xf32>, %base: index) -> vector<[17]xf32> {
%f7 = arith.constant 7.0: f32
%f = vector.transfer_read %A[%base], %f7 {in_bounds = [true]} :
memref<?xf32>, vector<[17]xf32>
return %f: vector<[17]xf32>
}
// CHECK-LABEL: func @transfer_read_1d_inbounds_scalable
// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<[17]xf32>
//
// 1. Bitcast to vector form.
// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} :
// CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr, f32
//
// 2. Rewrite as a load.
// CHECK: %[[loaded:.*]] = llvm.load %[[gep]] {alignment = 4 : i64} : !llvm.ptr -> vector<[17]xf32>

// -----

// CHECK-LABEL: func @transfer_read_1d_mask
// CHECK-LABEL: func @transfer_read_write_1d_mask
// CHECK: %[[mask1:.*]] = arith.constant dense<[false, false, true, false, true]>
// CHECK: %[[cmpi:.*]] = arith.cmpi slt
// CHECK: %[[mask2:.*]] = arith.andi %[[cmpi]], %[[mask1]]
// CHECK: %[[r:.*]] = llvm.intr.masked.load %{{.*}}, %[[mask2]]
// CHECK: %[[cmpi_1:.*]] = arith.cmpi slt
// CHECK: %[[mask3:.*]] = arith.andi %[[cmpi_1]], %[[mask1]]
// CHECK: llvm.intr.masked.store %[[r]], %{{.*}}, %[[mask3]]
// CHECK: return %[[r]]
func.func @transfer_read_1d_mask(%A : memref<?xf32>, %base : index) -> vector<5xf32> {
func.func @transfer_read_write_1d_mask(%A : memref<?xf32>, %base : index) -> vector<5xf32> {
%m = arith.constant dense<[0, 0, 1, 0, 1]> : vector<5xi1>
%f7 = arith.constant 7.0: f32
%f = vector.transfer_read %A[%base], %f7, %m : memref<?xf32>, vector<5xf32>
vector.transfer_write %f, %A[%base], %m : vector<5xf32>, memref<?xf32>
return %f: vector<5xf32>
}

// -----

// CHECK-LABEL: func @transfer_read_1d_scalable_mask
// CHECK: %[[passtru:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf32>
// CHECK: %[[r:.*]] = llvm.intr.masked.load %{{.*}}, %{{.*}}, %[[passtru]] {alignment = 4 : i32} : (!llvm.ptr, vector<[4]xi1>, vector<[4]xf32>) -> vector<[4]xf32>
// CHECK: return %[[r]] : vector<[4]xf32>
func.func @transfer_read_1d_scalable_mask(%arg0: memref<1x?xf32>, %mask: vector<[4]xi1>) -> vector<[4]xf32> {
%c0 = arith.constant 0 : index
%pad = arith.constant 0.0 : f32
%vec = vector.transfer_read %arg0[%c0, %c0], %pad, %mask {in_bounds = [true]} : memref<1x?xf32>, vector<[4]xf32>
return %vec : vector<[4]xf32>
// CHECK-LABEL: func @transfer_read_write_1d_mask_scalable
// CHECK-SAME: %[[mask:[a-zA-Z0-9]*]]: vector<[5]xi1>
// CHECK: %[[cmpi:.*]] = arith.cmpi slt
// CHECK: %[[mask1:.*]] = arith.andi %[[cmpi]], %[[mask]]
// CHECK: %[[r:.*]] = llvm.intr.masked.load %{{.*}}, %[[mask1]]
// CHECK: %[[cmpi_1:.*]] = arith.cmpi slt
// CHECK: %[[mask2:.*]] = arith.andi %[[cmpi_1]], %[[mask]]
// CHECK: llvm.intr.masked.store %[[r]], %{{.*}}, %[[mask2]]
// CHECK: return %[[r]]
func.func @transfer_read_write_1d_mask_scalable(%A : memref<?xf32>, %base : index, %m : vector<[5]xi1>) -> vector<[5]xf32> {
%f7 = arith.constant 7.0: f32
%f = vector.transfer_read %A[%base], %f7, %m : memref<?xf32>, vector<[5]xf32>
vector.transfer_write %f, %A[%base], %m : vector<[5]xf32>, memref<?xf32>
return %f: vector<[5]xf32>
}

// -----
// CHECK-LABEL: func @transfer_write_1d_scalable_mask
// CHECK: llvm.intr.masked.store %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : vector<[4]xf32>, vector<[4]xi1> into !llvm.ptr
func.func @transfer_write_1d_scalable_mask(%arg0: memref<1x?xf32>, %vec: vector<[4]xf32>, %mask: vector<[4]xi1>) {
%c0 = arith.constant 0 : index
vector.transfer_write %vec, %arg0[%c0, %c0], %mask {in_bounds = [true]} : vector<[4]xf32>, memref<1x?xf32>
return
}

// -----
// Can't lower xfer_read/xfer_write on tensors, but this shouldn't crash

// CHECK-LABEL: func @transfer_write_tensor
// CHECK-LABEL: func @transfer_read_write_tensor
// CHECK: vector.transfer_read
// CHECK: vector.transfer_write
func.func @transfer_write_tensor(%arg0: vector<4xf32>,%arg1: tensor<?xf32>) -> tensor<?xf32> {
%c0 = arith.constant 0 : index
%0 = vector.transfer_write %arg0, %arg1[%c0] : vector<4xf32>, tensor<?xf32>
return %0 : tensor<?xf32>
func.func @transfer_read_write_tensor(%A: tensor<?xf32>, %base : index) -> vector<4xf32> {
%f7 = arith.constant 7.0: f32
%c0 = arith.constant 0: index
%f = vector.transfer_read %A[%base], %f7 : tensor<?xf32>, vector<4xf32>
%w = vector.transfer_write %f, %A[%c0] : vector<4xf32>, tensor<?xf32>
"test.some_use"(%w) : (tensor<?xf32>) -> ()
return %f : vector<4xf32>
}

// -----
Expand Down
Loading