Skip to content

Commit 8de22c0

Browse files
committed
Revert "Interpret VPWidenPointerInductionRecipe as a scalar instead"
This reverts commit 760eb570768fa32f36dc64be73eb770bd27b691f.
1 parent 07f6b77 commit 8de22c0

File tree

5 files changed

+57
-41
lines changed

5 files changed

+57
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,10 @@ class LoopVectorizationCostModel {
956956
return expectedCost(UserVF).isValid();
957957
}
958958

959+
/// \return True if maximizing vector bandwidth is enabled by the target or
960+
/// user options.
961+
bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
962+
959963
/// \return The size (in bits) of the smallest and widest types in the code
960964
/// that needs to be vectorized. We ignore values that remain scalar such as
961965
/// 64 bit loop indices.
@@ -3918,6 +3922,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39183922
return FixedScalableVFPair::getNone();
39193923
}
39203924

3925+
bool LoopVectorizationCostModel::useMaxBandwidth(
3926+
TargetTransformInfo::RegisterKind RegKind) {
3927+
return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
3928+
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3929+
(UseWiderVFIfCallVariantsPresent &&
3930+
Legal->hasVectorCallVariants())));
3931+
}
3932+
39213933
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39223934
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
39233935
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3983,10 +3995,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39833995
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
39843996
: TargetTransformInfo::RGK_FixedWidthVector;
39853997
ElementCount MaxVF = MaxVectorElementCount;
3986-
if (MaximizeBandwidth ||
3987-
(MaximizeBandwidth.getNumOccurrences() == 0 &&
3988-
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3989-
(UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants())))) {
3998+
if (useMaxBandwidth(RegKind)) {
39903999
auto MaxVectorElementCountMaxBW = ElementCount::get(
39914000
llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
39924001
ComputeScalableMaxVF);
@@ -4341,15 +4350,24 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43414350
for (auto &P : VPlans) {
43424351
ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
43434352
P->vectorFactors().end());
4344-
auto RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
4345-
for (auto [VF, RU] : zip_equal(VFs, RUs)) {
4353+
4354+
SmallVector<VPRegisterUsage, 8> RUs;
4355+
if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
4356+
CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
4357+
RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
4358+
4359+
for (unsigned I = 0; I < VFs.size(); I++) {
4360+
ElementCount VF = VFs[I];
43464361
// The cost for scalar VF=1 is already calculated, so ignore it.
43474362
if (VF.isScalar())
43484363
continue;
43494364

43504365
/// Don't consider the VF if it exceeds the number of registers for the
43514366
/// target.
4352-
if (RU.exceedsMaxNumRegs(TTI))
4367+
if (CM.useMaxBandwidth(VF.isScalable()
4368+
? TargetTransformInfo::RGK_ScalableVector
4369+
: TargetTransformInfo::RGK_FixedWidthVector) &&
4370+
RUs[I].exceedsMaxNumRegs(TTI))
43534371
continue;
43544372

43554373
InstructionCost C = CM.expectedCost(VF);
@@ -7096,8 +7114,14 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
70967114
for (auto &P : VPlans) {
70977115
ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
70987116
P->vectorFactors().end());
7099-
auto RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
7100-
for (auto [VF, RU] : zip_equal(VFs, RUs)) {
7117+
7118+
SmallVector<VPRegisterUsage, 8> RUs;
7119+
if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
7120+
CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
7121+
RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
7122+
7123+
for (unsigned I = 0; I < VFs.size(); I++) {
7124+
ElementCount VF = VFs[I];
71017125
if (VF.isScalar())
71027126
continue;
71037127
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
@@ -7119,7 +7143,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71197143
InstructionCost Cost = cost(*P, VF);
71207144
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
71217145

7122-
if (RU.exceedsMaxNumRegs(TTI)) {
7146+
if (CM.useMaxBandwidth(VF.isScalable()
7147+
? TargetTransformInfo::RGK_ScalableVector
7148+
: TargetTransformInfo::RGK_FixedWidthVector) &&
7149+
RUs[I].exceedsMaxNumRegs(TTI)) {
71237150
LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
71247151
<< VF << " because it uses too many registers\n");
71257152
continue;

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
547547

548548
if (VFs[J].isScalar() ||
549549
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
550-
VPScalarIVStepsRecipe, VPWidenPointerInductionRecipe>(R) ||
550+
VPScalarIVStepsRecipe>(R) ||
551551
(isa<VPInstruction>(R) &&
552552
all_of(cast<VPSingleDefRecipe>(R)->users(),
553553
[&](VPUser *U) {

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33

44
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
55
; CHECK-LABEL: add
6-
; CHECK: LV(REG): VF = 8
7-
; CHECK-NEXT: LV(REG): Found max usage: 2 item
6+
; CHECK: LV(REG): Found max usage: 2 item
87
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
98
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
109
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,12 @@
44

55
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
66
; CHECK-LABEL: add
7-
; ZVFH: LV(REG): VF = 8
8-
; ZVFH-NEXT: LV(REG): Found max usage: 2 item
7+
; ZVFH: LV(REG): Found max usage: 2 item
98
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
109
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
1110
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
1211
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
13-
; ZVFHMIN: LV(REG): VF = 8
14-
; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item
12+
; ZVFHMIN: LV(REG): Found max usage: 2 item
1513
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
1614
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
1715
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,28 +28,24 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
2828
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers
2929
; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
3030
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
31-
; CHECK-LMUL1: LV(REG): VF = 2
32-
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
31+
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
3332
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
3433
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
3534
; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item
3635
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
37-
; CHECK-LMUL2: LV(REG): VF = 4
38-
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
36+
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
3937
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
40-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
38+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
4139
; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item
4240
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
43-
; CHECK-LMUL4: LV(REG): VF = 8
44-
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
41+
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
4542
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
46-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
43+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
4744
; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item
4845
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
49-
; CHECK-LMUL8: LV(REG): VF = 16
50-
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
46+
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
5147
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
52-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
48+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
5349
; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item
5450
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
5551

@@ -80,21 +76,17 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
8076
; CHECK-SCALAR: LV(REG): VF = 1
8177
; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
8278
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
83-
; CHECK-LMUL1: LV(REG): VF = 2
84-
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
85-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
86-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
87-
; CHECK-LMUL2: LV(REG): VF = 4
88-
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
89-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
79+
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
80+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
81+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers
82+
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
83+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
9084
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
91-
; CHECK-LMUL4: LV(REG): VF = 8
92-
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
93-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
85+
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
86+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
9487
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
95-
; CHECK-LMUL8: LV(REG): VF = 16
96-
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
97-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
88+
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
89+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
9890
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
9991
entry:
10092
%cmp3 = icmp sgt i32 %n, 0

0 commit comments

Comments
 (0)