@@ -42,7 +42,7 @@ func.func @vector_transfer_read_i2() -> vector<3xi2> {
4242
4343// -----
4444
45- func.func @vector_cst_maskedload_i2 (%passthru: vector <5 xi2 >) -> vector <3 x5 xi2 > {
45+ func.func @vector_constant_mask_maskedload_i2 (%passthru: vector <5 xi2 >) -> vector <3 x5 xi2 > {
4646 %0 = memref.alloc () : memref <3 x5 xi2 >
4747 %cst = arith.constant dense <0 > : vector <3 x5 xi2 >
4848 %mask = vector.constant_mask [3 ] : vector <5 xi1 >
@@ -54,7 +54,7 @@ func.func @vector_cst_maskedload_i2(%passthru: vector<5xi2>) -> vector<3x5xi2> {
5454 return %2 : vector <3 x5 xi2 >
5555}
5656
57- // CHECK-LABEL: func @vector_cst_maskedload_i2 (
57+ // CHECK-LABEL: func @vector_constant_mask_maskedload_i2 (
5858// CHECK-SAME: %[[ARG0:.+]]: vector<5xi2>) -> vector<3x5xi2>
5959// CHECK: %[[ORIGINMASK:.+]] = vector.constant_mask [3] : vector<5xi1>
6060// CHECK: %[[NEWMASK:.+]] = arith.constant dense<true> : vector<2xi1>
@@ -74,6 +74,55 @@ func.func @vector_cst_maskedload_i2(%passthru: vector<5xi2>) -> vector<3x5xi2> {
7474
7575// -----
7676
77+ // This tests the correctness of generating compressed mask with `vector.create_mask` and a dynamic input.
78+ // Specifically, the program masked loads a vector<5xi2> from `vector<3x5xi2>[1, 0]`, with an unknown mask generator `m`.
79+ // After emulation transformation, it masked loads 2 bytes from linearized index `vector<4xi8>[1]`, with a new compressed mask
80+ // given by `ceildiv(m + 1, 4)`.
81+ func.func @unaligned_create_mask_dynamic_i2 (%m : index , %passthru: vector <5 xi2 >) -> vector <5 xi2 > {
82+ %0 = memref.alloc () : memref <3 x5 xi2 >
83+ %c0 = arith.constant 0 : index
84+ %c1 = arith.constant 1 : index
85+ %mask = vector.create_mask %m : vector <5 xi1 >
86+ %1 = vector.maskedload %0 [%c1 , %c0 ], %mask , %passthru :
87+ memref <3 x5 xi2 >, vector <5 xi1 >, vector <5 xi2 > into vector <5 xi2 >
88+ return %1 : vector <5 xi2 >
89+ }
90+
91+ // CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> ((s0 + 1) ceildiv 4)>
92+ // CHECK: func @unaligned_create_mask_dynamic_i2(
93+ // CHECK-SAME: %[[NUM_ELEMS_TO_LOAD:.+]]: index, %[[PASSTHRU:.+]]: vector<5xi2>)
94+ // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<4xi8>
95+ // CHECK: %[[COMPRESSED_MASK:.+]] = affine.apply #map()[%[[NUM_ELEMS_TO_LOAD]]]
96+ // CHECK: vector.create_mask %[[COMPRESSED_MASK]] : vector<2xi1>
97+ // CHECK: %[[C1:.+]] = arith.constant 1 : index
98+ // CHECK: vector.maskedload %[[ALLOC]][%[[C1]]]
99+
100+ // -----
101+
102+ // This tests the correctness of generated compressed mask with `vector.create_mask`, and a static input.
103+ // Quite the same as the previous test, but the mask generator is a static value.
104+ // In this case, the desired slice `vector<7xi2>` spans over 3 bytes.
105+ func.func @check_unaligned_create_mask_static_i2 (%passthru: vector <7 xi2 >) -> vector <7 xi2 > {
106+ %0 = memref.alloc () : memref <3 x7 xi2 >
107+ %c0 = arith.constant 0 : index
108+ %c1 = arith.constant 1 : index
109+ %c3 = arith.constant 3 : index
110+ %mask = vector.create_mask %c3 : vector <7 xi1 >
111+ %1 = vector.maskedload %0 [%c1 , %c0 ], %mask , %passthru :
112+ memref <3 x7 xi2 >, vector <7 xi1 >, vector <7 xi2 > into vector <7 xi2 >
113+ return %1 : vector <7 xi2 >
114+ }
115+
116+ // CHECK: func @check_unaligned_create_mask_static_i2(
117+ // CHECK-SAME: %[[PASSTHRU:[a-zA-Z0-9]+]]: vector<7xi2>)
118+ // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<6xi8>
119+ // CHECK: %[[C2:.+]] = arith.constant 2 : index
120+ // CHECK: %[[COMP_MASK:.+]] = vector.create_mask %[[C2]] : vector<3xi1>
121+ // CHECK: %[[C1:.+]] = arith.constant 1 : index
122+ // CHECK: %4 = vector.maskedload %[[ALLOC]][%[[C1]]], %[[COMP_MASK]]
123+
124+ // -----
125+
77126func.func @vector_load_i2_dynamic_indexing (%idx1: index , %idx2: index ) -> vector <3 xi2 > {
78127 %0 = memref.alloc () : memref <3 x3 xi2 >
79128 %cst = arith.constant dense <0 > : vector <3 x3 xi2 >
0 commit comments