@@ -42,22 +42,20 @@ func.func @vector_transfer_read_i2() -> vector<3xi2> {
4242
4343// -----
4444
45- func.func @vector_constant_mask_maskedload_i2 (%passthru: vector <5 xi2 >) -> vector <3 x 5 x i2 > {
45+ func.func @vector_constant_mask_maskedload_i2 (%passthru: vector <5 xi2 >) -> vector <5 x i2 > {
4646 %0 = memref.alloc () : memref <3 x5 xi2 >
47- %cst = arith.constant dense <0 > : vector <3 x5 xi2 >
4847 %mask = vector.constant_mask [3 ] : vector <5 xi1 >
4948 %c0 = arith.constant 0 : index
5049 %c2 = arith.constant 2 : index
5150 %1 = vector.maskedload %0 [%c2 , %c0 ], %mask , %passthru :
5251 memref <3 x5 xi2 >, vector <5 xi1 >, vector <5 xi2 > into vector <5 xi2 >
53- %2 = vector.insert %1 , %cst [0 ] : vector <5 xi2 > into vector <3 x5 xi2 >
54- return %2 : vector <3 x5 xi2 >
52+ return %1 : vector <5 xi2 >
5553}
56-
5754// CHECK-LABEL: func @vector_constant_mask_maskedload_i2(
58- // CHECK-SAME: %[[ARG0:.+]]: vector<5xi2>) -> vector<3x5xi2>
55+ // CHECK-SAME: %[[ARG0:.+]]: vector<5xi2>) -> vector<5xi2>
56+ // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<4xi8>
5957// CHECK: %[[ORIGINMASK:.+]] = vector.constant_mask [3] : vector<5xi1>
60- // CHECK: %[[NEWMASK:.+]] = arith.constant dense<true> : vector<2xi1>
58+ // CHECK: %[[NEWMASK:.+]] = vector.constant_mask [2] : vector<2xi1>
6159// CHECK: %[[VESSEL:.+]] = arith.constant dense<0> : vector<8xi2>
6260// CHECK: %[[INSERT1:.+]] = vector.insert_strided_slice %[[ARG0]], %[[VESSEL]]
6361// CHECK-SAME: {offsets = [2], strides = [1]} : vector<5xi2> into vector<8xi2>
@@ -123,6 +121,29 @@ func.func @check_unaligned_create_mask_static_i2(%passthru: vector<7xi2>) -> vec
123121
124122// -----
125123
124+ // This test is similar to @vector_constant_mask_maskedload_i2, but the mask is multi-dimensional.
125+ func.func @vector_constant_mask_maskedload_i2_multidim (%passthru: vector <5 xi2 >) -> vector <5 xi2 > {
126+ %0 = memref.alloc () : memref <4 x3 x5 xi2 >
127+ %mask = vector.constant_mask [2 , 2 ] : vector <3 x5 xi1 >
128+ %ext_mask = vector.extract %mask [1 ] : vector <5 xi1 > from vector <3 x5 xi1 >
129+ %c0 = arith.constant 0 : index
130+ %c2 = arith.constant 2 : index
131+ %1 = vector.maskedload %0 [%c2 , %c0 , %c0 ], %ext_mask , %passthru :
132+ memref <4 x3 x5 xi2 >, vector <5 xi1 >, vector <5 xi2 > into vector <5 xi2 >
133+ return %1 : vector <5 xi2 >
134+ }
135+
136+ // CHECK-LABEL: func @vector_constant_mask_maskedload_i2_multidim(
137+ // CHECK: %[[ORIG_MASK:.+]] = vector.constant_mask [2, 2] : vector<3x5xi1>
138+ // CHECK: vector.extract %[[ORIG_MASK]][1]
139+
140+ // Compressing the mask used for emulated masked load.
141+ // The innermost dimension is compressed to 2 elements from 5.
142+ // CHECK: %[[NEW_COMPRESSED_MASK:.+]] = vector.constant_mask [2, 1] : vector<3x2xi1>
143+ // CHECK: vector.extract %[[NEW_COMPRESSED_MASK]][1]
144+
145+ // -----
146+
126147func.func @vector_load_i2_dynamic_indexing (%idx1: index , %idx2: index ) -> vector <3 xi2 > {
127148 %0 = memref.alloc () : memref <3 x3 xi2 >
128149 %cst = arith.constant dense <0 > : vector <3 x3 xi2 >
@@ -252,7 +273,7 @@ func.func @vector_maskedload_i2_dynamic_indexing_mixed(%passthru: vector<3xi2>,
252273// CHECK: %[[MASK:.+]] = vector.constant_mask [3] : vector<3xi1>
253274// CHECK: %[[LINEAR1:.+]] = affine.apply #map()[%[[IDX]]]
254275// CHECK: %[[LINEAR2:.+]] = affine.apply #map1()[%[[IDX]]]
255- // CHECK: %[[ONE:.+]] = arith.constant dense<true> : vector<2xi1>
276+ // CHECK: %[[ONE:.+]] = vector.constant_mask [2] : vector<2xi1>
256277// CHECK: %[[ZERO:.+]] = arith.constant dense<0> : vector<8xi2>
257278
258279// Extract passthru vector, and insert into zero vector, this is for constructing a new passthru
@@ -301,7 +322,7 @@ func.func @vector_maskedload_i2_dynamic_indexing_mixed(%passthru: vector<3xi2>,
301322
302323// -----
303324
304- func.func @vector_maskedload_i4_constant_mask_unaligned (%passthru: vector <5 xi2 >) -> vector <5 xi2 > {
325+ func.func @vector_maskedload_i2_constant_mask_unaligned (%passthru: vector <5 xi2 >) -> vector <5 xi2 > {
305326 %0 = memref.alloc () : memref <3 x5 xi2 >
306327 %mask = arith.constant dense <[false , true , true , true , false ]> : vector <5 xi1 >
307328 %c0 = arith.constant 0 : index
@@ -311,24 +332,23 @@ func.func @vector_maskedload_i4_constant_mask_unaligned(%passthru: vector<5xi2>)
311332 return %1 : vector <5 xi2 >
312333}
313334
314- // CHECK: func @vector_maskedload_i4_constant_mask_unaligned (
335+ // CHECK: func @vector_maskedload_i2_constant_mask_unaligned (
315336// CHECK-SAME: %[[PTH:.+]]: vector<5xi2>) -> vector<5xi2>
316337// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<4xi8>
317338// CHECK: %[[MASK:.+]] = arith.constant dense<[false, true, true, true, false]> : vector<5xi1>
318339
340+ // Emulated masked load from alloc:
319341// CHECK: %[[COMPRESSED_MASK:.+]] = arith.constant dense<true> : vector<2xi1>
320342// CHECK: %[[EMPTY:.+]] = arith.constant dense<0> : vector<8xi2>
321343// CHECK: %[[PTH_PADDED:.+]] = vector.insert_strided_slice %[[PTH]], %[[EMPTY]]
322344// CHECK-SAME: {offsets = [1], strides = [1]} : vector<5xi2> into vector<8xi2>
323-
324- // Emulated masked load from alloc:
325345// CHECK: %[[PTH_PADDED_UPCAST:.+]] = vector.bitcast %[[PTH_PADDED]] : vector<8xi2> to vector<2xi8>
326346// CHECK: %[[C1:.+]] = arith.constant 1 : index
327347// CHECK: %[[MASKLOAD:.+]] = vector.maskedload %[[ALLOC]][%[[C1]]], %[[COMPRESSED_MASK]], %[[PTH_PADDED_UPCAST]]
328348// CHECK: %[[MASKLOAD_DOWNCAST:.+]] = vector.bitcast %[[MASKLOAD]] : vector<2xi8> to vector<8xi2>
329349
330350// Select from emulated loaded vector and passthru vector:
331- // TODO: fold this part if possible.
351+ // TODO: fold insert_strided_slice into source if possible.
332352// CHECK: %[[EMPTY_MASK:.+]] = arith.constant dense<false> : vector<8xi1>
333353// CHECK: %[[MASK_PADDED:.+]] = vector.insert_strided_slice %[[MASK]], %[[EMPTY_MASK]]
334354// CHECK-SAME: {offsets = [1], strides = [1]} : vector<5xi1> into vector<8xi1>
0 commit comments