Skip to content

Commit d1113d2

Browse files
fixup! be less conservative
1 parent 69291c0 commit d1113d2

File tree

4 files changed

+39
-56
lines changed

4 files changed

+39
-56
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
947947
return true;
948948
}
949949

950+
static MachineOperand One = MachineOperand::CreateImm(1);
951+
950952
bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
951953
MachineInstr &MI) {
952954
// FIXME: Avoid visiting each user for each time we visit something on the
@@ -961,8 +963,9 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
961963
// Instructions like reductions may use a vector register as a scalar
962964
// register. In this case, we should treat it like a scalar register which
963965
// does not impact the decision on whether to optimize VL. But if there is
964-
// another user of MI and it has VL=0, we need to be sure not to reduce the
965-
// VL of MI to zero when the VLOp of UserOp is may be non-zero.
966+
// another user of MI and it may have VL=0, we need to be sure not to reduce
967+
// the VL of MI to zero when the VLOp of UserOp is may be non-zero. The most
968+
// we can reduce it to is one.
966969
if (isVectorOpUsedAsScalarOp(UserOp)) {
967970
[[maybe_unused]] Register R = UserOp.getReg();
968971
[[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
@@ -973,16 +976,9 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL,
973976
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
974977
const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
975978
if (VLOp.isReg() || (VLOp.isImm() && VLOp.getImm() != 0)) {
976-
if (!CommonVL) {
977-
CommonVL = &VLOp;
978-
continue;
979-
}
980-
if (!CommonVL->isIdenticalTo(VLOp)) {
981-
CanReduceVL = false;
982-
break;
983-
}
979+
CommonVL = &One;
980+
continue;
984981
}
985-
continue;
986982
}
987983

988984
if (mayReadPastVL(UserMI)) {

llvm/test/CodeGen/RISCV/double_reduct.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ define float @fmul_f32(<4 x float> %a, <4 x float> %b) {
2525
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2626
; CHECK-NEXT: vslidedown.vi v10, v8, 2
2727
; CHECK-NEXT: vfmul.vv v8, v8, v10
28-
; CHECK-NEXT: vslidedown.vi v10, v9, 2
29-
; CHECK-NEXT: vfmul.vv v9, v9, v10
3028
; CHECK-NEXT: vrgather.vi v10, v8, 1
3129
; CHECK-NEXT: vfmul.vv v8, v8, v10
32-
; CHECK-NEXT: vrgather.vi v10, v9, 1
33-
; CHECK-NEXT: vfmul.vv v9, v9, v10
3430
; CHECK-NEXT: vfmv.f.s fa5, v8
35-
; CHECK-NEXT: vfmv.f.s fa4, v9
31+
; CHECK-NEXT: vslidedown.vi v8, v9, 2
32+
; CHECK-NEXT: vfmul.vv v8, v9, v8
33+
; CHECK-NEXT: vrgather.vi v9, v8, 1
34+
; CHECK-NEXT: vfmul.vv v8, v8, v9
35+
; CHECK-NEXT: vfmv.f.s fa4, v8
3636
; CHECK-NEXT: fmul.s fa0, fa5, fa4
3737
; CHECK-NEXT: ret
3838
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
@@ -130,14 +130,14 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
130130
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
131131
; RV32-NEXT: vslidedown.vi v10, v8, 2
132132
; RV32-NEXT: vmul.vv v8, v8, v10
133-
; RV32-NEXT: vslidedown.vi v10, v9, 2
134-
; RV32-NEXT: vmul.vv v9, v9, v10
135133
; RV32-NEXT: vrgather.vi v10, v8, 1
136134
; RV32-NEXT: vmul.vv v8, v8, v10
137-
; RV32-NEXT: vrgather.vi v10, v9, 1
138-
; RV32-NEXT: vmul.vv v9, v9, v10
139135
; RV32-NEXT: vmv.x.s a0, v8
140-
; RV32-NEXT: vmv.x.s a1, v9
136+
; RV32-NEXT: vslidedown.vi v8, v9, 2
137+
; RV32-NEXT: vmul.vv v8, v9, v8
138+
; RV32-NEXT: vrgather.vi v9, v8, 1
139+
; RV32-NEXT: vmul.vv v8, v8, v9
140+
; RV32-NEXT: vmv.x.s a1, v8
141141
; RV32-NEXT: mul a0, a0, a1
142142
; RV32-NEXT: ret
143143
;
@@ -146,14 +146,14 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
146146
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
147147
; RV64-NEXT: vslidedown.vi v10, v8, 2
148148
; RV64-NEXT: vmul.vv v8, v8, v10
149-
; RV64-NEXT: vslidedown.vi v10, v9, 2
150-
; RV64-NEXT: vmul.vv v9, v9, v10
151149
; RV64-NEXT: vrgather.vi v10, v8, 1
152150
; RV64-NEXT: vmul.vv v8, v8, v10
153-
; RV64-NEXT: vrgather.vi v10, v9, 1
154-
; RV64-NEXT: vmul.vv v9, v9, v10
155151
; RV64-NEXT: vmv.x.s a0, v8
156-
; RV64-NEXT: vmv.x.s a1, v9
152+
; RV64-NEXT: vslidedown.vi v8, v9, 2
153+
; RV64-NEXT: vmul.vv v8, v9, v8
154+
; RV64-NEXT: vrgather.vi v9, v8, 1
155+
; RV64-NEXT: vmul.vv v8, v8, v9
156+
; RV64-NEXT: vmv.x.s a1, v8
157157
; RV64-NEXT: mulw a0, a0, a1
158158
; RV64-NEXT: ret
159159
%r1 = call i32 @llvm.vector.reduce.mul.i32.v4i32(<4 x i32> %a)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
437437
; RV32-NEXT: vslidedown.vi v9, v8, 1
438438
; RV32-NEXT: vmv.x.s a1, v9
439439
; RV32-NEXT: vslidedown.vi v9, v8, 2
440-
; RV32-NEXT: vslidedown.vi v8, v8, 3
441440
; RV32-NEXT: vmv.x.s a2, v9
441+
; RV32-NEXT: vslidedown.vi v8, v8, 3
442442
; RV32-NEXT: vmv.x.s a3, v8
443443
; RV32-NEXT: add a1, a1, a2
444444
; RV32-NEXT: add a0, a0, a3
@@ -452,8 +452,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
452452
; RV64-NEXT: vslidedown.vi v9, v8, 1
453453
; RV64-NEXT: vmv.x.s a1, v9
454454
; RV64-NEXT: vslidedown.vi v9, v8, 2
455-
; RV64-NEXT: vslidedown.vi v8, v8, 3
456455
; RV64-NEXT: vmv.x.s a2, v9
456+
; RV64-NEXT: vslidedown.vi v8, v8, 3
457457
; RV64-NEXT: vmv.x.s a3, v8
458458
; RV64-NEXT: add a1, a1, a2
459459
; RV64-NEXT: add a0, a0, a3
@@ -799,11 +799,8 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
799799
define float @reduce_fadd_2xf32_non_associative(ptr %p) {
800800
; CHECK-LABEL: reduce_fadd_2xf32_non_associative:
801801
; CHECK: # %bb.0:
802-
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
803-
; CHECK-NEXT: vle32.v v8, (a0)
804-
; CHECK-NEXT: vfmv.f.s fa5, v8
805-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
806-
; CHECK-NEXT: vfmv.f.s fa4, v8
802+
; CHECK-NEXT: flw fa5, 0(a0)
803+
; CHECK-NEXT: flw fa4, 4(a0)
807804
; CHECK-NEXT: fadd.s fa0, fa5, fa4
808805
; CHECK-NEXT: ret
809806
%v = load <2 x float>, ptr %p, align 256
@@ -835,11 +832,8 @@ define float @reduce_fadd_2xf32_reassoc_only(ptr %p) {
835832
define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
836833
; CHECK-LABEL: reduce_fadd_2xf32_ninf_only:
837834
; CHECK: # %bb.0:
838-
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
839-
; CHECK-NEXT: vle32.v v8, (a0)
840-
; CHECK-NEXT: vfmv.f.s fa5, v8
841-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
842-
; CHECK-NEXT: vfmv.f.s fa4, v8
835+
; CHECK-NEXT: flw fa5, 0(a0)
836+
; CHECK-NEXT: flw fa4, 4(a0)
843837
; CHECK-NEXT: fadd.s fa0, fa5, fa4
844838
; CHECK-NEXT: ret
845839
%v = load <2 x float>, ptr %p, align 256
@@ -854,15 +848,13 @@ define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
854848
define float @reduce_fadd_4xi32_non_associative(ptr %p) {
855849
; CHECK-LABEL: reduce_fadd_4xi32_non_associative:
856850
; CHECK: # %bb.0:
857-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
851+
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
858852
; CHECK-NEXT: vle32.v v8, (a0)
853+
; CHECK-NEXT: flw fa5, 12(a0)
859854
; CHECK-NEXT: lui a0, 524288
860855
; CHECK-NEXT: vmv.s.x v9, a0
861-
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
862-
; CHECK-NEXT: vfredusum.vs v9, v8, v9
863-
; CHECK-NEXT: vslidedown.vi v8, v8, 3
864-
; CHECK-NEXT: vfmv.f.s fa5, v8
865-
; CHECK-NEXT: vfmv.f.s fa4, v9
856+
; CHECK-NEXT: vfredusum.vs v8, v8, v9
857+
; CHECK-NEXT: vfmv.f.s fa4, v8
866858
; CHECK-NEXT: fadd.s fa0, fa4, fa5
867859
; CHECK-NEXT: ret
868860
%v = load <4 x float>, ptr %p, align 256
@@ -881,15 +873,10 @@ define float @reduce_fadd_4xi32_non_associative(ptr %p) {
881873
define float @reduce_fadd_4xi32_non_associative2(ptr %p) {
882874
; CHECK-LABEL: reduce_fadd_4xi32_non_associative2:
883875
; CHECK: # %bb.0:
884-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
885-
; CHECK-NEXT: vle32.v v8, (a0)
886-
; CHECK-NEXT: vfmv.f.s fa5, v8
887-
; CHECK-NEXT: vslidedown.vi v9, v8, 1
888-
; CHECK-NEXT: vfmv.f.s fa4, v9
889-
; CHECK-NEXT: vslidedown.vi v9, v8, 2
890-
; CHECK-NEXT: vslidedown.vi v8, v8, 3
891-
; CHECK-NEXT: vfmv.f.s fa3, v9
892-
; CHECK-NEXT: vfmv.f.s fa2, v8
876+
; CHECK-NEXT: flw fa5, 0(a0)
877+
; CHECK-NEXT: flw fa4, 4(a0)
878+
; CHECK-NEXT: flw fa3, 8(a0)
879+
; CHECK-NEXT: flw fa2, 12(a0)
893880
; CHECK-NEXT: fadd.s fa5, fa5, fa4
894881
; CHECK-NEXT: fadd.s fa4, fa3, fa2
895882
; CHECK-NEXT: fadd.s fa0, fa5, fa4

llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,10 @@ define float @reduce_fadd4(float %x, float %y, <4 x float> %v, <4 x float> %w) {
282282
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
283283
; CHECK-NEXT: vfmv.s.f v10, fa0
284284
; CHECK-NEXT: vfredusum.vs v8, v8, v10
285-
; CHECK-NEXT: vfmv.s.f v10, fa1
286-
; CHECK-NEXT: vfredusum.vs v9, v9, v10
287285
; CHECK-NEXT: vfmv.f.s fa5, v8
288-
; CHECK-NEXT: vfmv.f.s fa4, v9
286+
; CHECK-NEXT: vfmv.s.f v8, fa1
287+
; CHECK-NEXT: vfredusum.vs v8, v9, v8
288+
; CHECK-NEXT: vfmv.f.s fa4, v8
289289
; CHECK-NEXT: fdiv.s fa0, fa5, fa4
290290
; CHECK-NEXT: ret
291291
entry:

0 commit comments

Comments
 (0)