@@ -168,11 +168,12 @@ define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64>
168168define <4 x i64 > @m2_splat_into_slide_two_source (<4 x i64 > %v1 , <4 x i64 > %v2 ) vscale_range(2 ,2 ) {
169169; CHECK-LABEL: m2_splat_into_slide_two_source:
170170; CHECK: # %bb.0:
171- ; CHECK-NEXT: vsetivli zero, 2, e64, m1 , ta, ma
172- ; CHECK-NEXT: vslidedown.vi v13, v10, 1
173- ; CHECK-NEXT: vslideup.vi v13, v11, 1
171+ ; CHECK-NEXT: vsetivli zero, 1, e8, mf8 , ta, ma
172+ ; CHECK-NEXT: vmv.v.i v0, 12
173+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
174174; CHECK-NEXT: vrgather.vi v12, v8, 0
175- ; CHECK-NEXT: vmv2r.v v8, v12
175+ ; CHECK-NEXT: vslideup.vi v12, v10, 1, v0.t
176+ ; CHECK-NEXT: vmv.v.v v8, v12
176177; CHECK-NEXT: ret
177178 %res = shufflevector <4 x i64 > %v1 , <4 x i64 > %v2 , <4 x i32 > <i32 0 , i32 0 , i32 5 , i32 6 >
178179 ret <4 x i64 > %res
@@ -182,17 +183,18 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
182183; CHECK-LABEL: shuffle1:
183184; CHECK: # %bb.0:
184185; CHECK-NEXT: addi a0, a0, 252
185- ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
186- ; CHECK-NEXT: vmv.v.i v8, 0
187186; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
188- ; CHECK-NEXT: vid.v v10
187+ ; CHECK-NEXT: vid.v v8
189188; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
190- ; CHECK-NEXT: vle32.v v11, (a0)
191- ; CHECK-NEXT: vmv.v.i v0, 5
192- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
193- ; CHECK-NEXT: vsrl.vi v10, v10, 1
194- ; CHECK-NEXT: vadd.vi v10, v10, 1
195- ; CHECK-NEXT: vrgather.vv v9, v11, v10, v0.t
189+ ; CHECK-NEXT: vle32.v v9, (a0)
190+ ; CHECK-NEXT: li a0, 175
191+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
192+ ; CHECK-NEXT: vsrl.vi v8, v8, 1
193+ ; CHECK-NEXT: vmv.s.x v0, a0
194+ ; CHECK-NEXT: vadd.vi v8, v8, 1
195+ ; CHECK-NEXT: vrgather.vv v11, v9, v8
196+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
197+ ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0
196198; CHECK-NEXT: addi a0, a1, 672
197199; CHECK-NEXT: vs2r.v v8, (a0)
198200; CHECK-NEXT: ret
@@ -209,15 +211,15 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
209211define <16 x float > @shuffle2 (<4 x float > %a ) vscale_range(2 ,2 ) {
210212; CHECK-LABEL: shuffle2:
211213; CHECK: # %bb.0:
214+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215+ ; CHECK-NEXT: vid.v v9
216+ ; CHECK-NEXT: li a0, -97
217+ ; CHECK-NEXT: vadd.vv v9, v9, v9
218+ ; CHECK-NEXT: vrsub.vi v9, v9, 4
219+ ; CHECK-NEXT: vmv.s.x v0, a0
220+ ; CHECK-NEXT: vrgather.vv v13, v8, v9
212221; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
213- ; CHECK-NEXT: vmv1r.v v12, v8
214- ; CHECK-NEXT: vmv.v.i v8, 0
215- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
216- ; CHECK-NEXT: vid.v v13
217- ; CHECK-NEXT: vadd.vv v13, v13, v13
218- ; CHECK-NEXT: vmv.v.i v0, 6
219- ; CHECK-NEXT: vrsub.vi v13, v13, 4
220- ; CHECK-NEXT: vrgather.vv v9, v12, v13, v0.t
222+ ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
221223; CHECK-NEXT: ret
222224 %b = extractelement <4 x float > %a , i32 2
223225 %c = insertelement <16 x float > <float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float undef , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 , float 0 .000000e+00 >, float %b , i32 5
@@ -229,15 +231,16 @@ define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) {
229231define i64 @extract_any_extend_vector_inreg_v16i64 (<16 x i64 > %a0 , i32 %a1 ) vscale_range(2 ,2 ) {
230232; RV32-LABEL: extract_any_extend_vector_inreg_v16i64:
231233; RV32: # %bb.0:
232- ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
234+ ; RV32-NEXT: li a1, 16
235+ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
233236; RV32-NEXT: vmv.v.i v16, 0
234- ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu
235- ; RV32-NEXT: vmv.v.i v0, 1
237+ ; RV32-NEXT: vmv.s.x v0, a1
236238; RV32-NEXT: li a1, 32
237- ; RV32-NEXT: vrgather.vi v18, v15, 1 , v0.t
238- ; RV32-NEXT: vsetivli zero, 1 , e64, m8, ta, ma
239+ ; RV32-NEXT: vrgather.vi v16, v8, 15 , v0.t
240+ ; RV32-NEXT: vsetvli zero, zero , e64, m8, ta, ma
239241; RV32-NEXT: vslidedown.vx v8, v16, a0
240242; RV32-NEXT: vmv.x.s a0, v8
243+ ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
241244; RV32-NEXT: vsrl.vx v8, v8, a1
242245; RV32-NEXT: vmv.x.s a1, v8
243246; RV32-NEXT: ret
@@ -255,14 +258,13 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
255258; RV64-NEXT: addi s0, sp, 256
256259; RV64-NEXT: .cfi_def_cfa s0, 0
257260; RV64-NEXT: andi sp, sp, -128
258- ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
259- ; RV64-NEXT: vmv.v.i v0, 1
261+ ; RV64-NEXT: li a1, -17
260262; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
261- ; RV64-NEXT: vmv.v.i v16, 0
262- ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
263- ; RV64-NEXT: vrgather.vi v18, v15, 1 , v0.t
263+ ; RV64-NEXT: vmv.s.x v0, a1
264+ ; RV64-NEXT: vrgather.vi v16, v8, 15
265+ ; RV64-NEXT: vmerge.vim v8, v16, 0 , v0
264266; RV64-NEXT: mv s2, sp
265- ; RV64-NEXT: vs8r.v v16 , (s2)
267+ ; RV64-NEXT: vs8r.v v8 , (s2)
266268; RV64-NEXT: andi a0, a0, 15
267269; RV64-NEXT: li a1, 8
268270; RV64-NEXT: call __muldi3
@@ -288,16 +290,21 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
288290define <4 x double > @shuffles_add (<4 x double > %0 , <4 x double > %1 ) vscale_range(2 ,2 ) {
289291; CHECK-LABEL: shuffles_add:
290292; CHECK: # %bb.0:
291- ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
292- ; CHECK-NEXT: vmv1r.v v13, v10
293- ; CHECK-NEXT: vslideup.vi v13, v11, 1
294- ; CHECK-NEXT: vmv1r.v v8, v9
295- ; CHECK-NEXT: vmv.v.i v0, 1
296- ; CHECK-NEXT: vrgather.vi v12, v9, 0
297- ; CHECK-NEXT: vmv1r.v v9, v11
298- ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t
299293; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
300- ; CHECK-NEXT: vfadd.vv v8, v12, v8
294+ ; CHECK-NEXT: vrgather.vi v12, v8, 2
295+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
296+ ; CHECK-NEXT: vid.v v14
297+ ; CHECK-NEXT: vmv.v.i v0, 12
298+ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
299+ ; CHECK-NEXT: vrgather.vi v16, v8, 3
300+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
301+ ; CHECK-NEXT: vadd.vv v8, v14, v14
302+ ; CHECK-NEXT: vadd.vi v9, v8, -4
303+ ; CHECK-NEXT: vadd.vi v8, v8, -3
304+ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
305+ ; CHECK-NEXT: vrgatherei16.vv v12, v10, v9, v0.t
306+ ; CHECK-NEXT: vrgatherei16.vv v16, v10, v8, v0.t
307+ ; CHECK-NEXT: vfadd.vv v8, v12, v16
301308; CHECK-NEXT: ret
302309 %3 = shufflevector <4 x double > %0 , <4 x double > %1 , <4 x i32 > <i32 undef , i32 2 , i32 4 , i32 6 >
303310 %4 = shufflevector <4 x double > %0 , <4 x double > %1 , <4 x i32 > <i32 undef , i32 3 , i32 5 , i32 7 >
0 commit comments