@@ -69,14 +69,9 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
6969define <4 x double > @vrgather_permute_shuffle_vu_v4f64 (<4 x double > %x ) {
7070; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64:
7171; CHECK: # %bb.0:
72- ; CHECK-NEXT: lui a0, 4096
73- ; CHECK-NEXT: addi a0, a0, 513
74- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
75- ; CHECK-NEXT: vmv.s.x v10, a0
76- ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
77- ; CHECK-NEXT: vsext.vf2 v12, v10
78- ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
79- ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
72+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
73+ ; CHECK-NEXT: vslidedown.vi v10, v8, 1
74+ ; CHECK-NEXT: vslideup.vi v10, v8, 2
8075; CHECK-NEXT: vmv.v.v v8, v10
8176; CHECK-NEXT: ret
8277 %s = shufflevector <4 x double > %x , <4 x double > poison, <4 x i32 > <i32 1 , i32 2 , i32 0 , i32 1 >
@@ -86,14 +81,9 @@ define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
8681define <4 x double > @vrgather_permute_shuffle_uv_v4f64 (<4 x double > %x ) {
8782; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64:
8883; CHECK: # %bb.0:
89- ; CHECK-NEXT: lui a0, 4096
90- ; CHECK-NEXT: addi a0, a0, 513
91- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
92- ; CHECK-NEXT: vmv.s.x v10, a0
93- ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
94- ; CHECK-NEXT: vsext.vf2 v12, v10
95- ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
96- ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
84+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
85+ ; CHECK-NEXT: vslidedown.vi v10, v8, 1
86+ ; CHECK-NEXT: vslideup.vi v10, v8, 2
9787; CHECK-NEXT: vmv.v.v v8, v10
9888; CHECK-NEXT: ret
9989 %s = shufflevector <4 x double > poison, <4 x double > %x , <4 x i32 > <i32 5 , i32 6 , i32 4 , i32 5 >
@@ -103,13 +93,12 @@ define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
10393define <4 x double > @vrgather_shuffle_vv_v4f64 (<4 x double > %x , <4 x double > %y ) {
10494; CHECK-LABEL: vrgather_shuffle_vv_v4f64:
10595; CHECK: # %bb.0:
106- ; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
107- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0)
108- ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
109- ; CHECK-NEXT: vle16.v v14, (a0)
96+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
97+ ; CHECK-NEXT: vslidedown.vi v12, v8, 1
98+ ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
11099; CHECK-NEXT: vmv.v.i v0, 8
111- ; CHECK-NEXT: vsetvli zero, zero , e64, m2, ta, mu
112- ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
100+ ; CHECK-NEXT: vsetivli zero, 4 , e64, m2, ta, mu
101+ ; CHECK-NEXT: vslideup.vi v12, v8, 2
113102; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t
114103; CHECK-NEXT: vmv.v.v v8, v12
115104; CHECK-NEXT: ret
@@ -120,16 +109,18 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
120109define <4 x double > @vrgather_shuffle_xv_v4f64 (<4 x double > %x ) {
121110; CHECK-LABEL: vrgather_shuffle_xv_v4f64:
122111; CHECK: # %bb.0:
112+ ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
113+ ; CHECK-NEXT: vmv.v.i v0, 8
123114; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
124115; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0)
125- ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
126- ; CHECK-NEXT: vid.v v10
127- ; CHECK-NEXT: vrsub.vi v12, v10, 4
116+ ; CHECK-NEXT: vmv2r.v v10, v8
117+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
118+ ; CHECK-NEXT: vslideup.vi v10, v8, 2, v0.t
119+ ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
128120; CHECK-NEXT: vmv.v.i v0, 12
129- ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
130- ; CHECK-NEXT: vfmv.v.f v10, fa5
131- ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t
132- ; CHECK-NEXT: vmv.v.v v8, v10
121+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
122+ ; CHECK-NEXT: vfmv.v.f v8, fa5
123+ ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
133124; CHECK-NEXT: ret
134125 %s = shufflevector <4 x double > <double 2 .0 , double 2 .0 , double 2 .0 , double 2 .0 >, <4 x double > %x , <4 x i32 > <i32 0 , i32 3 , i32 6 , i32 5 >
135126 ret <4 x double > %s
@@ -138,17 +129,17 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
138129define <4 x double > @vrgather_shuffle_vx_v4f64 (<4 x double > %x ) {
139130; CHECK-LABEL: vrgather_shuffle_vx_v4f64:
140131; CHECK: # %bb.0:
132+ ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
133+ ; CHECK-NEXT: vmv.v.i v0, 2
141134; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
142135; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0)
143- ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
144- ; CHECK-NEXT: vmv.v.i v10, 9
145- ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
146- ; CHECK-NEXT: vcompress.vm v12, v8, v10
136+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
137+ ; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t
147138; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
148139; CHECK-NEXT: vmv.v.i v0, 3
149140; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
150- ; CHECK-NEXT: vfmv.v.f v8 , fa5
151- ; CHECK-NEXT: vmerge.vvm v8, v8, v12 , v0
141+ ; CHECK-NEXT: vfmv.v.f v10 , fa5
142+ ; CHECK-NEXT: vmerge.vvm v8, v10, v8 , v0
152143; CHECK-NEXT: ret
153144 %s = shufflevector <4 x double > %x , <4 x double > <double 2 .0 , double 2 .0 , double 2 .0 , double 2 .0 >, <4 x i32 > <i32 0 , i32 3 , i32 6 , i32 5 >
154145 ret <4 x double > %s
@@ -311,13 +302,9 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
311302define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16 (<4 x bfloat> %x ) {
312303; CHECK-LABEL: vrgather_permute_shuffle_vu_v4bf16:
313304; CHECK: # %bb.0:
314- ; CHECK-NEXT: lui a0, 4096
315- ; CHECK-NEXT: addi a0, a0, 513
316- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
317- ; CHECK-NEXT: vmv.s.x v9, a0
318- ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
319- ; CHECK-NEXT: vsext.vf2 v10, v9
320- ; CHECK-NEXT: vrgather.vv v9, v8, v10
305+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
306+ ; CHECK-NEXT: vslidedown.vi v9, v8, 1
307+ ; CHECK-NEXT: vslideup.vi v9, v8, 2
321308; CHECK-NEXT: vmv1r.v v8, v9
322309; CHECK-NEXT: ret
323310 %s = shufflevector <4 x bfloat> %x , <4 x bfloat> poison, <4 x i32 > <i32 1 , i32 2 , i32 0 , i32 1 >
@@ -327,12 +314,10 @@ define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) {
327314define <4 x bfloat> @vrgather_shuffle_vv_v4bf16 (<4 x bfloat> %x , <4 x bfloat> %y ) {
328315; CHECK-LABEL: vrgather_shuffle_vv_v4bf16:
329316; CHECK: # %bb.0:
330- ; CHECK-NEXT: lui a0, %hi(.LCPI25_0)
331- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0)
332317; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
333- ; CHECK-NEXT: vle16.v v11, (a0)
318+ ; CHECK-NEXT: vslidedown.vi v10, v8, 1
334319; CHECK-NEXT: vmv.v.i v0, 8
335- ; CHECK-NEXT: vrgather.vv v10, v8, v11
320+ ; CHECK-NEXT: vslideup.vi v10, v8, 2
336321; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
337322; CHECK-NEXT: vmv1r.v v8, v10
338323; CHECK-NEXT: ret
@@ -355,13 +340,9 @@ define <4 x bfloat> @vrgather_shuffle_vx_v4bf16_load(ptr %p) {
355340define <4 x half > @vrgather_permute_shuffle_vu_v4f16 (<4 x half > %x ) {
356341; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16:
357342; CHECK: # %bb.0:
358- ; CHECK-NEXT: lui a0, 4096
359- ; CHECK-NEXT: addi a0, a0, 513
360- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
361- ; CHECK-NEXT: vmv.s.x v9, a0
362- ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
363- ; CHECK-NEXT: vsext.vf2 v10, v9
364- ; CHECK-NEXT: vrgather.vv v9, v8, v10
343+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
344+ ; CHECK-NEXT: vslidedown.vi v9, v8, 1
345+ ; CHECK-NEXT: vslideup.vi v9, v8, 2
365346; CHECK-NEXT: vmv1r.v v8, v9
366347; CHECK-NEXT: ret
367348 %s = shufflevector <4 x half > %x , <4 x half > poison, <4 x i32 > <i32 1 , i32 2 , i32 0 , i32 1 >
@@ -371,12 +352,10 @@ define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
371352define <4 x half > @vrgather_shuffle_vv_v4f16 (<4 x half > %x , <4 x half > %y ) {
372353; CHECK-LABEL: vrgather_shuffle_vv_v4f16:
373354; CHECK: # %bb.0:
374- ; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
375- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0)
376355; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
377- ; CHECK-NEXT: vle16.v v11, (a0)
356+ ; CHECK-NEXT: vslidedown.vi v10, v8, 1
378357; CHECK-NEXT: vmv.v.i v0, 8
379- ; CHECK-NEXT: vrgather.vv v10, v8, v11
358+ ; CHECK-NEXT: vslideup.vi v10, v8, 2
380359; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
381360; CHECK-NEXT: vmv1r.v v8, v10
382361; CHECK-NEXT: ret
0 commit comments