@@ -437,8 +437,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
437437; RV32-NEXT: vslidedown.vi v9, v8, 1
438438; RV32-NEXT: vmv.x.s a1, v9
439439; RV32-NEXT: vslidedown.vi v9, v8, 2
440- ; RV32-NEXT: vslidedown.vi v8, v8, 3
441440; RV32-NEXT: vmv.x.s a2, v9
441+ ; RV32-NEXT: vslidedown.vi v8, v8, 3
442442; RV32-NEXT: vmv.x.s a3, v8
443443; RV32-NEXT: add a1, a1, a2
444444; RV32-NEXT: add a0, a0, a3
@@ -452,8 +452,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
452452; RV64-NEXT: vslidedown.vi v9, v8, 1
453453; RV64-NEXT: vmv.x.s a1, v9
454454; RV64-NEXT: vslidedown.vi v9, v8, 2
455- ; RV64-NEXT: vslidedown.vi v8, v8, 3
456455; RV64-NEXT: vmv.x.s a2, v9
456+ ; RV64-NEXT: vslidedown.vi v8, v8, 3
457457; RV64-NEXT: vmv.x.s a3, v8
458458; RV64-NEXT: add a1, a1, a2
459459; RV64-NEXT: add a0, a0, a3
@@ -799,11 +799,8 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
799799define float @reduce_fadd_2xf32_non_associative (ptr %p ) {
800800; CHECK-LABEL: reduce_fadd_2xf32_non_associative:
801801; CHECK: # %bb.0:
802- ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
803- ; CHECK-NEXT: vle32.v v8, (a0)
804- ; CHECK-NEXT: vfmv.f.s fa5, v8
805- ; CHECK-NEXT: vslidedown.vi v8, v8, 1
806- ; CHECK-NEXT: vfmv.f.s fa4, v8
802+ ; CHECK-NEXT: flw fa5, 0(a0)
803+ ; CHECK-NEXT: flw fa4, 4(a0)
807804; CHECK-NEXT: fadd.s fa0, fa5, fa4
808805; CHECK-NEXT: ret
809806 %v = load <2 x float >, ptr %p , align 256
@@ -835,11 +832,8 @@ define float @reduce_fadd_2xf32_reassoc_only(ptr %p) {
835832define float @reduce_fadd_2xf32_ninf_only (ptr %p ) {
836833; CHECK-LABEL: reduce_fadd_2xf32_ninf_only:
837834; CHECK: # %bb.0:
838- ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
839- ; CHECK-NEXT: vle32.v v8, (a0)
840- ; CHECK-NEXT: vfmv.f.s fa5, v8
841- ; CHECK-NEXT: vslidedown.vi v8, v8, 1
842- ; CHECK-NEXT: vfmv.f.s fa4, v8
835+ ; CHECK-NEXT: flw fa5, 0(a0)
836+ ; CHECK-NEXT: flw fa4, 4(a0)
843837; CHECK-NEXT: fadd.s fa0, fa5, fa4
844838; CHECK-NEXT: ret
845839 %v = load <2 x float >, ptr %p , align 256
@@ -854,15 +848,13 @@ define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
854848define float @reduce_fadd_4xi32_non_associative (ptr %p ) {
855849; CHECK-LABEL: reduce_fadd_4xi32_non_associative:
856850; CHECK: # %bb.0:
857- ; CHECK-NEXT: vsetivli zero, 4 , e32, m1, ta, ma
851+ ; CHECK-NEXT: vsetivli zero, 3 , e32, m1, ta, ma
858852; CHECK-NEXT: vle32.v v8, (a0)
853+ ; CHECK-NEXT: flw fa5, 12(a0)
859854; CHECK-NEXT: lui a0, 524288
860855; CHECK-NEXT: vmv.s.x v9, a0
861- ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
862- ; CHECK-NEXT: vfredusum.vs v9, v8, v9
863- ; CHECK-NEXT: vslidedown.vi v8, v8, 3
864- ; CHECK-NEXT: vfmv.f.s fa5, v8
865- ; CHECK-NEXT: vfmv.f.s fa4, v9
856+ ; CHECK-NEXT: vfredusum.vs v8, v8, v9
857+ ; CHECK-NEXT: vfmv.f.s fa4, v8
866858; CHECK-NEXT: fadd.s fa0, fa4, fa5
867859; CHECK-NEXT: ret
868860 %v = load <4 x float >, ptr %p , align 256
@@ -881,15 +873,10 @@ define float @reduce_fadd_4xi32_non_associative(ptr %p) {
881873define float @reduce_fadd_4xi32_non_associative2 (ptr %p ) {
882874; CHECK-LABEL: reduce_fadd_4xi32_non_associative2:
883875; CHECK: # %bb.0:
884- ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
885- ; CHECK-NEXT: vle32.v v8, (a0)
886- ; CHECK-NEXT: vfmv.f.s fa5, v8
887- ; CHECK-NEXT: vslidedown.vi v9, v8, 1
888- ; CHECK-NEXT: vfmv.f.s fa4, v9
889- ; CHECK-NEXT: vslidedown.vi v9, v8, 2
890- ; CHECK-NEXT: vslidedown.vi v8, v8, 3
891- ; CHECK-NEXT: vfmv.f.s fa3, v9
892- ; CHECK-NEXT: vfmv.f.s fa2, v8
876+ ; CHECK-NEXT: flw fa5, 0(a0)
877+ ; CHECK-NEXT: flw fa4, 4(a0)
878+ ; CHECK-NEXT: flw fa3, 8(a0)
879+ ; CHECK-NEXT: flw fa2, 12(a0)
893880; CHECK-NEXT: fadd.s fa5, fa5, fa4
894881; CHECK-NEXT: fadd.s fa4, fa3, fa2
895882; CHECK-NEXT: fadd.s fa0, fa5, fa4
0 commit comments