@@ -168,12 +168,11 @@ define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64>
168168define <4 x i64 > @m2_splat_into_slide_two_source (<4 x i64 > %v1 , <4 x i64 > %v2 ) vscale_range(2 ,2 ) {
169169; CHECK-LABEL: m2_splat_into_slide_two_source:
170170; CHECK: # %bb.0:
171- ; CHECK-NEXT: vsetivli zero, 1, e8, mf8 , ta, ma
172- ; CHECK-NEXT: vmv.v.i v0, 12
173- ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
171+ ; CHECK-NEXT: vsetivli zero, 2, e64, m1 , ta, ma
172+ ; CHECK-NEXT: vslidedown.vi v13, v10, 1
173+ ; CHECK-NEXT: vslideup.vi v13, v11, 1
174174; CHECK-NEXT: vrgather.vi v12, v8, 0
175- ; CHECK-NEXT: vslideup.vi v12, v10, 1, v0.t
176- ; CHECK-NEXT: vmv.v.v v8, v12
175+ ; CHECK-NEXT: vmv2r.v v8, v12
177176; CHECK-NEXT: ret
178177 %res = shufflevector <4 x i64 > %v1 , <4 x i64 > %v2 , <4 x i32 > <i32 0 , i32 0 , i32 5 , i32 6 >
179178 ret <4 x i64 > %res
@@ -183,18 +182,17 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
183182; CHECK-LABEL: shuffle1:
184183; CHECK: # %bb.0:
185184; CHECK-NEXT: addi a0, a0, 252
185+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
186+ ; CHECK-NEXT: vmv.v.i v8, 0
186187; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
187- ; CHECK-NEXT: vid.v v8
188+ ; CHECK-NEXT: vid.v v10
188189; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
189- ; CHECK-NEXT: vle32.v v9, (a0)
190- ; CHECK-NEXT: li a0, 175
191- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
192- ; CHECK-NEXT: vsrl.vi v8, v8, 1
193- ; CHECK-NEXT: vmv.s.x v0, a0
194- ; CHECK-NEXT: vadd.vi v8, v8, 1
195- ; CHECK-NEXT: vrgather.vv v11, v9, v8
196- ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
197- ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0
190+ ; CHECK-NEXT: vle32.v v11, (a0)
191+ ; CHECK-NEXT: vmv.v.i v0, 5
192+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
193+ ; CHECK-NEXT: vsrl.vi v10, v10, 1
194+ ; CHECK-NEXT: vadd.vi v10, v10, 1
195+ ; CHECK-NEXT: vrgather.vv v9, v11, v10, v0.t
198196; CHECK-NEXT: addi a0, a1, 672
199197; CHECK-NEXT: vs2r.v v8, (a0)
200198; CHECK-NEXT: ret
@@ -211,15 +209,15 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
211209define <16 x float > @shuffle2 (<4 x float > %a ) vscale_range(2 ,2 ) {
212210; CHECK-LABEL: shuffle2:
213211; CHECK: # %bb.0:
214- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215- ; CHECK-NEXT: vid.v v9
216- ; CHECK-NEXT: li a0, -97
217- ; CHECK-NEXT: vadd.vv v9, v9, v9
218- ; CHECK-NEXT: vrsub.vi v9, v9, 4
219- ; CHECK-NEXT: vmv.s.x v0, a0
220- ; CHECK-NEXT: vrgather.vv v13, v8, v9
221212; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
222- ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
213+ ; CHECK-NEXT: vmv1r.v v12, v8
214+ ; CHECK-NEXT: vmv.v.i v8, 0
215+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
216+ ; CHECK-NEXT: vid.v v13
217+ ; CHECK-NEXT: vadd.vv v13, v13, v13
218+ ; CHECK-NEXT: vmv.v.i v0, 6
219+ ; CHECK-NEXT: vrsub.vi v13, v13, 4
220+ ; CHECK-NEXT: vrgather.vv v9, v12, v13, v0.t
223221; CHECK-NEXT: ret
224222 %b = extractelement <4 x float > %a , i32 2
225223 %c = insertelement <16 x float > <float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float undef , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 >, float %b , i32 5
@@ -231,16 +229,15 @@ define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) {
231229define i64 @extract_any_extend_vector_inreg_v16i64 (<16 x i64 > %a0 , i32 %a1 ) vscale_range(2 ,2 ) {
232230; RV32-LABEL: extract_any_extend_vector_inreg_v16i64:
233231; RV32: # %bb.0:
234- ; RV32-NEXT: li a1, 16
235- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
232+ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
236233; RV32-NEXT: vmv.v.i v16, 0
237- ; RV32-NEXT: vmv.s.x v0, a1
234+ ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu
235+ ; RV32-NEXT: vmv.v.i v0, 1
238236; RV32-NEXT: li a1, 32
239- ; RV32-NEXT: vrgather.vi v16, v8, 15 , v0.t
240- ; RV32-NEXT: vsetvli zero, zero , e64, m8, ta, ma
237+ ; RV32-NEXT: vrgather.vi v18, v15, 1 , v0.t
238+ ; RV32-NEXT: vsetivli zero, 1 , e64, m8, ta, ma
241239; RV32-NEXT: vslidedown.vx v8, v16, a0
242240; RV32-NEXT: vmv.x.s a0, v8
243- ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
244241; RV32-NEXT: vsrl.vx v8, v8, a1
245242; RV32-NEXT: vmv.x.s a1, v8
246243; RV32-NEXT: ret
@@ -258,13 +255,14 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
258255; RV64-NEXT: addi s0, sp, 256
259256; RV64-NEXT: .cfi_def_cfa s0, 0
260257; RV64-NEXT: andi sp, sp, -128
261- ; RV64-NEXT: li a1, -17
258+ ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
259+ ; RV64-NEXT: vmv.v.i v0, 1
262260; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
263- ; RV64-NEXT: vmv.s.x v0, a1
264- ; RV64-NEXT: vrgather.vi v16, v8, 15
265- ; RV64-NEXT: vmerge.vim v8, v16, 0 , v0
261+ ; RV64-NEXT: vmv.v.i v16, 0
262+ ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
263+ ; RV64-NEXT: vrgather.vi v18, v15, 1 , v0.t
266264; RV64-NEXT: mv s2, sp
267- ; RV64-NEXT: vs8r.v v8 , (s2)
265+ ; RV64-NEXT: vs8r.v v16 , (s2)
268266; RV64-NEXT: andi a0, a0, 15
269267; RV64-NEXT: li a1, 8
270268; RV64-NEXT: call __muldi3
@@ -290,21 +288,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
290288define <4 x double > @shuffles_add (<4 x double > %0 , <4 x double > %1 ) vscale_range(2 ,2 ) {
291289; CHECK-LABEL: shuffles_add:
292290; CHECK: # %bb.0:
291+ ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
292+ ; CHECK-NEXT: vmv1r.v v13, v10
293+ ; CHECK-NEXT: vslideup.vi v13, v11, 1
294+ ; CHECK-NEXT: vmv1r.v v8, v9
295+ ; CHECK-NEXT: vmv.v.i v0, 1
296+ ; CHECK-NEXT: vrgather.vi v12, v9, 0
297+ ; CHECK-NEXT: vmv1r.v v9, v11
298+ ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t
293299; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
294- ; CHECK-NEXT: vrgather.vi v12, v8, 2
295- ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
296- ; CHECK-NEXT: vid.v v14
297- ; CHECK-NEXT: vmv.v.i v0, 12
298- ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
299- ; CHECK-NEXT: vrgather.vi v16, v8, 3
300- ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
301- ; CHECK-NEXT: vadd.vv v8, v14, v14
302- ; CHECK-NEXT: vadd.vi v9, v8, -4
303- ; CHECK-NEXT: vadd.vi v8, v8, -3
304- ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
305- ; CHECK-NEXT: vrgatherei16.vv v12, v10, v9, v0.t
306- ; CHECK-NEXT: vrgatherei16.vv v16, v10, v8, v0.t
307- ; CHECK-NEXT: vfadd.vv v8, v12, v16
300+ ; CHECK-NEXT: vfadd.vv v8, v12, v8
308301; CHECK-NEXT: ret
309302 %3 = shufflevector <4 x double > %0 , <4 x double > %1 , <4 x i32 > <i32 undef , i32 2 , i32 4 , i32 6 >
310303 %4 = shufflevector <4 x double > %0 , <4 x double > %1 , <4 x i32 > <i32 undef , i32 3 , i32 5 , i32 7 >
@@ -332,12 +325,13 @@ entry:
332325define <16 x i32 > @m4_linear_num_of_shuffles_in_chunks (<16 x i32 > %0 ) vscale_range(2 ,2 ) {
333326; CHECK-LABEL: m4_linear_num_of_shuffles_in_chunks:
334327; CHECK: # %bb.0: # %entry
335- ; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
336- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI18_0)
337- ; CHECK-NEXT: vl2re16.v v16, (a0)
338- ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
339- ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
340- ; CHECK-NEXT: vmv.v.v v8, v12
328+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
329+ ; CHECK-NEXT: vmv.v.i v0, 8
330+ ; CHECK-NEXT: vrgather.vi v12, v10, 0
331+ ; CHECK-NEXT: vrgather.vi v12, v11, 0, v0.t
332+ ; CHECK-NEXT: vrgather.vi v14, v8, 2
333+ ; CHECK-NEXT: vrgather.vi v15, v10, 3
334+ ; CHECK-NEXT: vmv4r.v v8, v12
341335; CHECK-NEXT: ret
342336entry:
343337 %1 = shufflevector <16 x i32 > %0 , <16 x i32 > poison, <16 x i32 > <i32 poison, i32 poison, i32 8 , i32 12 , i32 poison, i32 poison, i32 poison, i32 poison, i32 2 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11 , i32 poison>
0 commit comments