@@ -889,17 +889,12 @@ define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
889889; CHECK-NEXT: csrr a0, vlenb
890890; CHECK-NEXT: srli a0, a0, 3
891891; CHECK-NEXT: slli a1, a0, 1
892- ; CHECK-NEXT: add a1, a1, a0
893892; CHECK-NEXT: add a0, a1, a0
894- ; CHECK-NEXT: lui a2, 1048568
895- ; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
896- ; CHECK-NEXT: vmv.v.x v9, a2
897- ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
898- ; CHECK-NEXT: vslideup.vx v8, v9, a1
899- ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
893+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
900894; CHECK-NEXT: vfmv.s.f v9, fa0
901- ; CHECK-NEXT: vfredosum.vs v8, v8, v9
902- ; CHECK-NEXT: vfmv.f.s fa0, v8
895+ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
896+ ; CHECK-NEXT: vfredosum.vs v9, v8, v9
897+ ; CHECK-NEXT: vfmv.f.s fa0, v9
903898; CHECK-NEXT: ret
904899 %red = call half @llvm.vector.reduce.fadd.nxv3f16 (half %s , <vscale x 3 x half > %v )
905900 ret half %red
@@ -910,18 +905,15 @@ declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>)
910905define half @vreduce_ord_fadd_nxv6f16 (<vscale x 6 x half > %v , half %s ) {
911906; CHECK-LABEL: vreduce_ord_fadd_nxv6f16:
912907; CHECK: # %bb.0:
913- ; CHECK-NEXT: lui a0, 1048568
914- ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
915- ; CHECK-NEXT: vmv.v.x v10, a0
916908; CHECK-NEXT: csrr a0, vlenb
917- ; CHECK-NEXT: srli a0, a0, 2
918- ; CHECK-NEXT: add a1, a0, a0
919- ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
920- ; CHECK-NEXT: vslideup.vx v9, v10, a0
921- ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
909+ ; CHECK-NEXT: srli a1, a0, 3
910+ ; CHECK-NEXT: slli a1, a1, 1
911+ ; CHECK-NEXT: sub a0, a0, a1
912+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
922913; CHECK-NEXT: vfmv.s.f v10, fa0
923- ; CHECK-NEXT: vfredosum.vs v8, v8, v10
924- ; CHECK-NEXT: vfmv.f.s fa0, v8
914+ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
915+ ; CHECK-NEXT: vfredosum.vs v10, v8, v10
916+ ; CHECK-NEXT: vfmv.f.s fa0, v10
925917; CHECK-NEXT: ret
926918 %red = call half @llvm.vector.reduce.fadd.nxv6f16 (half %s , <vscale x 6 x half > %v )
927919 ret half %red
@@ -932,22 +924,15 @@ declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>)
932924define half @vreduce_ord_fadd_nxv10f16 (<vscale x 10 x half > %v , half %s ) {
933925; CHECK-LABEL: vreduce_ord_fadd_nxv10f16:
934926; CHECK: # %bb.0:
935- ; CHECK-NEXT: lui a0, 1048568
936- ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
937- ; CHECK-NEXT: vmv.v.x v12, a0
938927; CHECK-NEXT: csrr a0, vlenb
939- ; CHECK-NEXT: srli a0, a0, 2
940- ; CHECK-NEXT: add a1, a0, a0
941- ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
942- ; CHECK-NEXT: vslideup.vx v10, v12, a0
943- ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
944- ; CHECK-NEXT: vmv.v.v v11, v12
945- ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
946- ; CHECK-NEXT: vslideup.vx v11, v12, a0
947- ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
928+ ; CHECK-NEXT: srli a0, a0, 3
929+ ; CHECK-NEXT: li a1, 10
930+ ; CHECK-NEXT: mul a0, a0, a1
931+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
948932; CHECK-NEXT: vfmv.s.f v12, fa0
949- ; CHECK-NEXT: vfredosum.vs v8, v8, v12
950- ; CHECK-NEXT: vfmv.f.s fa0, v8
933+ ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
934+ ; CHECK-NEXT: vfredosum.vs v12, v8, v12
935+ ; CHECK-NEXT: vfmv.f.s fa0, v12
951936; CHECK-NEXT: ret
952937 %red = call half @llvm.vector.reduce.fadd.nxv10f16 (half %s , <vscale x 10 x half > %v )
953938 ret half %red
@@ -958,13 +943,16 @@ declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>)
958943define half @vreduce_ord_fadd_nxv12f16 (<vscale x 12 x half > %v , half %s ) {
959944; CHECK-LABEL: vreduce_ord_fadd_nxv12f16:
960945; CHECK: # %bb.0:
961- ; CHECK-NEXT: lui a0, 1048568
962- ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
963- ; CHECK-NEXT: vmv.v.x v11, a0
946+ ; CHECK-NEXT: csrr a0, vlenb
947+ ; CHECK-NEXT: srli a0, a0, 3
948+ ; CHECK-NEXT: slli a1, a0, 2
949+ ; CHECK-NEXT: slli a0, a0, 4
950+ ; CHECK-NEXT: sub a0, a0, a1
951+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
964952; CHECK-NEXT: vfmv.s.f v12, fa0
965- ; CHECK-NEXT: vsetvli a0, zero , e16, m4, ta, ma
966- ; CHECK-NEXT: vfredosum.vs v8 , v8, v12
967- ; CHECK-NEXT: vfmv.f.s fa0, v8
953+ ; CHECK-NEXT: vsetvli zero, a0 , e16, m4, ta, ma
954+ ; CHECK-NEXT: vfredosum.vs v12 , v8, v12
955+ ; CHECK-NEXT: vfmv.f.s fa0, v12
968956; CHECK-NEXT: ret
969957 %red = call half @llvm.vector.reduce.fadd.nxv12f16 (half %s , <vscale x 12 x half > %v )
970958 ret half %red
@@ -977,17 +965,14 @@ define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
977965; CHECK-NEXT: csrr a0, vlenb
978966; CHECK-NEXT: srli a0, a0, 3
979967; CHECK-NEXT: slli a1, a0, 1
980- ; CHECK-NEXT: add a1, a1, a0
981968; CHECK-NEXT: add a0, a1, a0
982- ; CHECK-NEXT: lui a2, 1048568
983- ; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
984- ; CHECK-NEXT: vmv.v.x v9, a2
985- ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
986- ; CHECK-NEXT: vslideup.vx v8, v9, a1
987- ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
969+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
988970; CHECK-NEXT: vfmv.s.f v9, fa0
989- ; CHECK-NEXT: vfredusum.vs v8, v8, v9
990- ; CHECK-NEXT: vfmv.f.s fa0, v8
971+ ; CHECK-NEXT: lui a1, 1048568
972+ ; CHECK-NEXT: vmv.s.x v10, a1
973+ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
974+ ; CHECK-NEXT: vfredusum.vs v10, v8, v9
975+ ; CHECK-NEXT: vfmv.f.s fa0, v10
991976; CHECK-NEXT: ret
992977 %red = call reassoc half @llvm.vector.reduce.fadd.nxv3f16 (half %s , <vscale x 3 x half > %v )
993978 ret half %red
@@ -996,18 +981,17 @@ define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
996981define half @vreduce_fadd_nxv6f16 (<vscale x 6 x half > %v , half %s ) {
997982; CHECK-LABEL: vreduce_fadd_nxv6f16:
998983; CHECK: # %bb.0:
999- ; CHECK-NEXT: lui a0, 1048568
1000- ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1001- ; CHECK-NEXT: vmv.v.x v10, a0
1002984; CHECK-NEXT: csrr a0, vlenb
1003- ; CHECK-NEXT: srli a0, a0, 2
1004- ; CHECK-NEXT: add a1, a0, a0
1005- ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1006- ; CHECK-NEXT: vslideup.vx v9, v10, a0
1007- ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
985+ ; CHECK-NEXT: srli a1, a0, 3
986+ ; CHECK-NEXT: slli a1, a1, 1
987+ ; CHECK-NEXT: sub a0, a0, a1
988+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1008989; CHECK-NEXT: vfmv.s.f v10, fa0
1009- ; CHECK-NEXT: vfredusum.vs v8, v8, v10
1010- ; CHECK-NEXT: vfmv.f.s fa0, v8
990+ ; CHECK-NEXT: lui a1, 1048568
991+ ; CHECK-NEXT: vmv.s.x v11, a1
992+ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
993+ ; CHECK-NEXT: vfredusum.vs v11, v8, v10
994+ ; CHECK-NEXT: vfmv.f.s fa0, v11
1011995; CHECK-NEXT: ret
1012996 %red = call reassoc half @llvm.vector.reduce.fadd.nxv6f16 (half %s , <vscale x 6 x half > %v )
1013997 ret half %red
0 commit comments