Skip to content

Commit 8663143

Browse files
committed
Only compute reg usage if needed
1 parent 95543d1 commit 8663143

File tree

4 files changed

+36
-36
lines changed

4 files changed

+36
-36
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4350,8 +4350,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43504350
for (auto &P : VPlans) {
43514351
ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
43524352
P->vectorFactors().end());
4353-
auto RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
4354-
for (auto [VF, RU] : zip_equal(VFs, RUs)) {
4353+
SmallVector<VPRegisterUsage, 8> RUs;
4354+
if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
4355+
CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
4356+
RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
4357+
4358+
for (unsigned I = 0; I < VFs.size(); I++) {
4359+
ElementCount VF = VFs[I];
43554360
// The cost for scalar VF=1 is already calculated, so ignore it.
43564361
if (VF.isScalar())
43574362
continue;
@@ -4361,7 +4366,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43614366
if (CM.useMaxBandwidth(VF.isScalable()
43624367
? TargetTransformInfo::RGK_ScalableVector
43634368
: TargetTransformInfo::RGK_FixedWidthVector) &&
4364-
RU.exceedsMaxNumRegs(TTI))
4369+
RUs[I].exceedsMaxNumRegs(TTI))
43654370
continue;
43664371

43674372
InstructionCost C = CM.expectedCost(VF);
@@ -7108,8 +7113,14 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71087113
for (auto &P : VPlans) {
71097114
ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
71107115
P->vectorFactors().end());
7111-
auto RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
7112-
for (auto [VF, RU] : zip_equal(VFs, RUs)) {
7116+
7117+
SmallVector<VPRegisterUsage, 8> RUs;
7118+
if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
7119+
CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
7120+
RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
7121+
7122+
for (unsigned I = 0; I < VFs.size(); I++) {
7123+
ElementCount VF = VFs[I];
71137124
if (VF.isScalar())
71147125
continue;
71157126
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
@@ -7134,7 +7145,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71347145
if (CM.useMaxBandwidth(VF.isScalable()
71357146
? TargetTransformInfo::RGK_ScalableVector
71367147
: TargetTransformInfo::RGK_FixedWidthVector) &&
7137-
RU.exceedsMaxNumRegs(TTI)) {
7148+
RUs[I].exceedsMaxNumRegs(TTI)) {
71387149
LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
71397150
<< VF << " because it uses too many registers\n");
71407151
continue;

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33

44
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
55
; CHECK-LABEL: add
6-
; CHECK: LV(REG): VF = 8
7-
; CHECK-NEXT: LV(REG): Found max usage: 2 item
6+
; CHECK: LV(REG): Found max usage: 2 item
87
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
98
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
109
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,12 @@
44

55
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
66
; CHECK-LABEL: add
7-
; ZVFH: LV(REG): VF = 8
8-
; ZVFH-NEXT: LV(REG): Found max usage: 2 item
7+
; ZVFH: LV(REG): Found max usage: 2 item
98
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
109
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
1110
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
1211
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
13-
; ZVFHMIN: LV(REG): VF = 8
14-
; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item
12+
; ZVFHMIN: LV(REG): Found max usage: 2 item
1513
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
1614
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
1715
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,28 +28,24 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
2828
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers
2929
; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
3030
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
31-
; CHECK-LMUL1: LV(REG): VF = 2
32-
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
31+
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
3332
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
3433
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
3534
; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item
3635
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
37-
; CHECK-LMUL2: LV(REG): VF = 4
38-
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
36+
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
3937
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
40-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
38+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
4139
; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item
4240
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
43-
; CHECK-LMUL4: LV(REG): VF = 8
44-
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
41+
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
4542
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
46-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
43+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
4744
; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item
4845
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
49-
; CHECK-LMUL8: LV(REG): VF = 16
50-
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
46+
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
5147
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
52-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
48+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
5349
; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item
5450
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
5551

@@ -80,21 +76,17 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
8076
; CHECK-SCALAR: LV(REG): VF = 1
8177
; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
8278
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
83-
; CHECK-LMUL1: LV(REG): VF = 2
84-
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
85-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
86-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
87-
; CHECK-LMUL2: LV(REG): VF = 4
88-
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
89-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
79+
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
80+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
81+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers
82+
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
83+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
9084
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
91-
; CHECK-LMUL4: LV(REG): VF = 8
92-
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
93-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
85+
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
86+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
9487
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
95-
; CHECK-LMUL8: LV(REG): VF = 16
96-
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
97-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
88+
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
89+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
9890
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
9991
entry:
10092
%cmp3 = icmp sgt i32 %n, 0

0 commit comments

Comments
 (0)