@@ -874,27 +874,30 @@ define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) {
874874define <32 x i8 > @reverse_v32i8_2 (<16 x i8 > %a , <16 x i8 > %b ) {
875875; CHECK-LABEL: reverse_v32i8_2:
876876; CHECK: # %bb.0:
877- ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
878- ; CHECK-NEXT: vmv1r.v v10, v9
879877; CHECK-NEXT: csrr a0, vlenb
880- ; CHECK-NEXT: vid.v v12
881- ; CHECK-NEXT: addi a1, a0, -1
882- ; CHECK-NEXT: vrsub.vx v12, v12, a1
883- ; CHECK-NEXT: lui a1, 16
884- ; CHECK-NEXT: addi a1, a1, -1
878+ ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
879+ ; CHECK-NEXT: vid.v v10
880+ ; CHECK-NEXT: li a1, 32
881+ ; CHECK-NEXT: addi a2, a0, -1
882+ ; CHECK-NEXT: vrsub.vx v10, v10, a2
883+ ; CHECK-NEXT: lui a2, 16
885884; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
886- ; CHECK-NEXT: vrgatherei16.vv v15, v8, v12
887- ; CHECK-NEXT: vrgatherei16.vv v14, v9, v12
885+ ; CHECK-NEXT: vrgatherei16.vv v15, v8, v10
886+ ; CHECK-NEXT: vrgatherei16.vv v14, v12, v10
887+ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
888+ ; CHECK-NEXT: vid.v v10
889+ ; CHECK-NEXT: addi a2, a2, -1
890+ ; CHECK-NEXT: vrsub.vi v10, v10, 15
891+ ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
892+ ; CHECK-NEXT: vrgather.vv v17, v13, v10
893+ ; CHECK-NEXT: vrgather.vv v16, v9, v10
888894; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
889- ; CHECK-NEXT: vmv.s.x v0, a1
890- ; CHECK-NEXT: li a1, 32
895+ ; CHECK-NEXT: vmv.s.x v0, a2
891896; CHECK-NEXT: slli a0, a0, 1
892- ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
893- ; CHECK-NEXT: vid.v v8
894897; CHECK-NEXT: addi a0, a0, -32
895- ; CHECK-NEXT: vrsub.vi v12, v8, 15
898+ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
896899; CHECK-NEXT: vslidedown.vx v8, v14, a0
897- ; CHECK-NEXT: vrgather.vv v8, v10, v12 , v0.t
900+ ; CHECK-NEXT: vmerge.vvm v8, v8, v16 , v0
898901; CHECK-NEXT: ret
899902 %res = shufflevector <16 x i8 > %a , <16 x i8 > %b , <32 x i32 > <i32 31 , i32 30 , i32 29 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
900903 ret <32 x i8 > %res
@@ -943,23 +946,25 @@ define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) {
943946define <16 x i16 > @reverse_v16i16_2 (<8 x i16 > %a , <8 x i16 > %b ) {
944947; CHECK-LABEL: reverse_v16i16_2:
945948; CHECK: # %bb.0:
946- ; CHECK-NEXT: vsetvli a0, zero , e16, m1 , ta, ma
947- ; CHECK-NEXT: vmv1r .v v10, v9
949+ ; CHECK-NEXT: vsetivli zero, 16 , e16, m2 , ta, ma
950+ ; CHECK-NEXT: vid .v v10
948951; CHECK-NEXT: csrr a0, vlenb
952+ ; CHECK-NEXT: vrsub.vi v10, v10, 7
953+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
954+ ; CHECK-NEXT: vrgather.vv v13, v12, v10
955+ ; CHECK-NEXT: vrgather.vv v12, v9, v10
949956; CHECK-NEXT: vid.v v9
950957; CHECK-NEXT: srli a1, a0, 1
951958; CHECK-NEXT: addi a1, a1, -1
952959; CHECK-NEXT: vrsub.vx v9, v9, a1
953- ; CHECK-NEXT: vrgather.vv v13, v8, v9
954- ; CHECK-NEXT: vrgather.vv v12, v11, v9
955- ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
956- ; CHECK-NEXT: vid.v v8
957960; CHECK-NEXT: li a1, 255
958961; CHECK-NEXT: addi a0, a0, -16
959- ; CHECK-NEXT: vrsub.vi v14, v8, 7
962+ ; CHECK-NEXT: vrgather.vv v15, v8, v9
963+ ; CHECK-NEXT: vrgather.vv v14, v10, v9
960964; CHECK-NEXT: vmv.s.x v0, a1
961- ; CHECK-NEXT: vslidedown.vx v8, v12, a0
962- ; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t
965+ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
966+ ; CHECK-NEXT: vslidedown.vx v8, v14, a0
967+ ; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
963968; CHECK-NEXT: ret
964969 %res = shufflevector <8 x i16 > %a , <8 x i16 > %b , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
965970 ret <16 x i16 > %res
@@ -1024,24 +1029,27 @@ define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) {
10241029define <8 x i32 > @reverse_v8i32_2 (<4 x i32 > %a , <4 x i32 > %b ) {
10251030; CHECK-LABEL: reverse_v8i32_2:
10261031; CHECK: # %bb.0:
1027- ; CHECK-NEXT: vsetvli a0, zero, e32 , m1, ta, ma
1028- ; CHECK-NEXT: vmv1r .v v10, v9
1032+ ; CHECK-NEXT: vsetivli zero, 8, e16 , m1, ta, ma
1033+ ; CHECK-NEXT: vid .v v10
10291034; CHECK-NEXT: csrr a0, vlenb
1030- ; CHECK-NEXT: vid.v v9
1031- ; CHECK-NEXT: srli a1, a0, 2
1032- ; CHECK-NEXT: addi a1, a1, -1
1033- ; CHECK-NEXT: vrsub.vx v9, v9, a1
1034- ; CHECK-NEXT: vrgather.vv v13, v8, v9
1035- ; CHECK-NEXT: vrgather.vv v12, v11, v9
1035+ ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1036+ ; CHECK-NEXT: vid.v v12
10361037; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1037- ; CHECK-NEXT: vid.v v8
1038- ; CHECK-NEXT: vmv.v.i v0, 15
1038+ ; CHECK-NEXT: vrsub.vi v10, v10, 3
1039+ ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1040+ ; CHECK-NEXT: vrgatherei16.vv v15, v11, v10
1041+ ; CHECK-NEXT: vrgatherei16.vv v14, v9, v10
1042+ ; CHECK-NEXT: srli a1, a0, 2
10391043; CHECK-NEXT: srli a0, a0, 1
1040- ; CHECK-NEXT: vrsub.vi v14, v8, 3
1044+ ; CHECK-NEXT: addi a1, a1, -1
10411045; CHECK-NEXT: addi a0, a0, -8
1042- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1046+ ; CHECK-NEXT: vrsub.vx v10, v12, a1
1047+ ; CHECK-NEXT: vrgather.vv v13, v8, v10
1048+ ; CHECK-NEXT: vrgather.vv v12, v9, v10
1049+ ; CHECK-NEXT: vmv.v.i v0, 15
1050+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10431051; CHECK-NEXT: vslidedown.vx v8, v12, a0
1044- ; CHECK-NEXT: vrgatherei16.vv v8, v10 , v14, v0.t
1052+ ; CHECK-NEXT: vmerge.vvm v8, v8 , v14, v0
10451053; CHECK-NEXT: ret
10461054 %res = shufflevector <4 x i32 > %a , <4 x i32 > %b , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
10471055 ret <8 x i32 > %res
@@ -1197,23 +1205,25 @@ define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) {
11971205define <16 x half > @reverse_v16f16_2 (<8 x half > %a , <8 x half > %b ) {
11981206; CHECK-LABEL: reverse_v16f16_2:
11991207; CHECK: # %bb.0:
1200- ; CHECK-NEXT: vsetvli a0, zero , e16, m1 , ta, ma
1201- ; CHECK-NEXT: vmv1r .v v10, v9
1208+ ; CHECK-NEXT: vsetivli zero, 16 , e16, m2 , ta, ma
1209+ ; CHECK-NEXT: vid .v v10
12021210; CHECK-NEXT: csrr a0, vlenb
1211+ ; CHECK-NEXT: vrsub.vi v10, v10, 7
1212+ ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1213+ ; CHECK-NEXT: vrgather.vv v13, v12, v10
1214+ ; CHECK-NEXT: vrgather.vv v12, v9, v10
12031215; CHECK-NEXT: vid.v v9
12041216; CHECK-NEXT: srli a1, a0, 1
12051217; CHECK-NEXT: addi a1, a1, -1
12061218; CHECK-NEXT: vrsub.vx v9, v9, a1
1207- ; CHECK-NEXT: vrgather.vv v13, v8, v9
1208- ; CHECK-NEXT: vrgather.vv v12, v11, v9
1209- ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
1210- ; CHECK-NEXT: vid.v v8
12111219; CHECK-NEXT: li a1, 255
12121220; CHECK-NEXT: addi a0, a0, -16
1213- ; CHECK-NEXT: vrsub.vi v14, v8, 7
1221+ ; CHECK-NEXT: vrgather.vv v15, v8, v9
1222+ ; CHECK-NEXT: vrgather.vv v14, v10, v9
12141223; CHECK-NEXT: vmv.s.x v0, a1
1215- ; CHECK-NEXT: vslidedown.vx v8, v12, a0
1216- ; CHECK-NEXT: vrgather.vv v8, v10, v14, v0.t
1224+ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1225+ ; CHECK-NEXT: vslidedown.vx v8, v14, a0
1226+ ; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
12171227; CHECK-NEXT: ret
12181228 %res = shufflevector <8 x half > %a , <8 x half > %b , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
12191229 ret <16 x half > %res
@@ -1269,24 +1279,27 @@ define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) {
12691279define <8 x float > @reverse_v8f32_2 (<4 x float > %a , <4 x float > %b ) {
12701280; CHECK-LABEL: reverse_v8f32_2:
12711281; CHECK: # %bb.0:
1272- ; CHECK-NEXT: vsetvli a0, zero, e32 , m1, ta, ma
1273- ; CHECK-NEXT: vmv1r .v v10, v9
1282+ ; CHECK-NEXT: vsetivli zero, 8, e16 , m1, ta, ma
1283+ ; CHECK-NEXT: vid .v v10
12741284; CHECK-NEXT: csrr a0, vlenb
1275- ; CHECK-NEXT: vid.v v9
1276- ; CHECK-NEXT: srli a1, a0, 2
1277- ; CHECK-NEXT: addi a1, a1, -1
1278- ; CHECK-NEXT: vrsub.vx v9, v9, a1
1279- ; CHECK-NEXT: vrgather.vv v13, v8, v9
1280- ; CHECK-NEXT: vrgather.vv v12, v11, v9
1285+ ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1286+ ; CHECK-NEXT: vid.v v12
12811287; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1282- ; CHECK-NEXT: vid.v v8
1283- ; CHECK-NEXT: vmv.v.i v0, 15
1288+ ; CHECK-NEXT: vrsub.vi v10, v10, 3
1289+ ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1290+ ; CHECK-NEXT: vrgatherei16.vv v15, v11, v10
1291+ ; CHECK-NEXT: vrgatherei16.vv v14, v9, v10
1292+ ; CHECK-NEXT: srli a1, a0, 2
12841293; CHECK-NEXT: srli a0, a0, 1
1285- ; CHECK-NEXT: vrsub.vi v14, v8, 3
1294+ ; CHECK-NEXT: addi a1, a1, -1
12861295; CHECK-NEXT: addi a0, a0, -8
1287- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1296+ ; CHECK-NEXT: vrsub.vx v10, v12, a1
1297+ ; CHECK-NEXT: vrgather.vv v13, v8, v10
1298+ ; CHECK-NEXT: vrgather.vv v12, v9, v10
1299+ ; CHECK-NEXT: vmv.v.i v0, 15
1300+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12881301; CHECK-NEXT: vslidedown.vx v8, v12, a0
1289- ; CHECK-NEXT: vrgatherei16.vv v8, v10 , v14, v0.t
1302+ ; CHECK-NEXT: vmerge.vvm v8, v8 , v14, v0
12901303; CHECK-NEXT: ret
12911304 %res = shufflevector <4 x float > %a , <4 x float > %b , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
12921305 ret <8 x float > %res
0 commit comments