Skip to content

Commit 07f6b77

Browse files
committed
Interpret VPWidenPointerInductionRecipe as a scalar instead
1 parent 8663143 commit 07f6b77

File tree

5 files changed

+41
-56
lines changed

5 files changed

+41
-56
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -956,10 +956,6 @@ class LoopVectorizationCostModel {
956956
return expectedCost(UserVF).isValid();
957957
}
958958

959-
/// \return True if maximizing vector bandwidth is enabled by the target or
960-
/// user options.
961-
bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
962-
963959
/// \return The size (in bits) of the smallest and widest types in the code
964960
/// that needs to be vectorized. We ignore values that remain scalar such as
965961
/// 64 bit loop indices.
@@ -3922,14 +3918,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39223918
return FixedScalableVFPair::getNone();
39233919
}
39243920

3925-
bool LoopVectorizationCostModel::useMaxBandwidth(
3926-
TargetTransformInfo::RegisterKind RegKind) {
3927-
return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
3928-
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3929-
(UseWiderVFIfCallVariantsPresent &&
3930-
Legal->hasVectorCallVariants())));
3931-
}
3932-
39333921
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39343922
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
39353923
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3995,7 +3983,10 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39953983
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
39963984
: TargetTransformInfo::RGK_FixedWidthVector;
39973985
ElementCount MaxVF = MaxVectorElementCount;
3998-
if (useMaxBandwidth(RegKind)) {
3986+
if (MaximizeBandwidth ||
3987+
(MaximizeBandwidth.getNumOccurrences() == 0 &&
3988+
(TTI.shouldMaximizeVectorBandwidth(RegKind) ||
3989+
(UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants())))) {
39993990
auto MaxVectorElementCountMaxBW = ElementCount::get(
40003991
llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
40013992
ComputeScalableMaxVF);
@@ -4350,23 +4341,15 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43504341
for (auto &P : VPlans) {
43514342
ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
43524343
P->vectorFactors().end());
4353-
SmallVector<VPRegisterUsage, 8> RUs;
4354-
if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
4355-
CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
4356-
RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
4357-
4358-
for (unsigned I = 0; I < VFs.size(); I++) {
4359-
ElementCount VF = VFs[I];
4344+
auto RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
4345+
for (auto [VF, RU] : zip_equal(VFs, RUs)) {
43604346
// The cost for scalar VF=1 is already calculated, so ignore it.
43614347
if (VF.isScalar())
43624348
continue;
43634349

43644350
/// Don't consider the VF if it exceeds the number of registers for the
43654351
/// target.
4366-
if (CM.useMaxBandwidth(VF.isScalable()
4367-
? TargetTransformInfo::RGK_ScalableVector
4368-
: TargetTransformInfo::RGK_FixedWidthVector) &&
4369-
RUs[I].exceedsMaxNumRegs(TTI))
4352+
if (RU.exceedsMaxNumRegs(TTI))
43704353
continue;
43714354

43724355
InstructionCost C = CM.expectedCost(VF);
@@ -7113,14 +7096,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71137096
for (auto &P : VPlans) {
71147097
ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
71157098
P->vectorFactors().end());
7116-
7117-
SmallVector<VPRegisterUsage, 8> RUs;
7118-
if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
7119-
CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
7120-
RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
7121-
7122-
for (unsigned I = 0; I < VFs.size(); I++) {
7123-
ElementCount VF = VFs[I];
7099+
auto RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore);
7100+
for (auto [VF, RU] : zip_equal(VFs, RUs)) {
71247101
if (VF.isScalar())
71257102
continue;
71267103
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
@@ -7142,10 +7119,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71427119
InstructionCost Cost = cost(*P, VF);
71437120
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
71447121

7145-
if (CM.useMaxBandwidth(VF.isScalable()
7146-
? TargetTransformInfo::RGK_ScalableVector
7147-
: TargetTransformInfo::RGK_FixedWidthVector) &&
7148-
RUs[I].exceedsMaxNumRegs(TTI)) {
7122+
if (RU.exceedsMaxNumRegs(TTI)) {
71497123
LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
71507124
<< VF << " because it uses too many registers\n");
71517125
continue;

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
547547

548548
if (VFs[J].isScalar() ||
549549
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
550-
VPScalarIVStepsRecipe>(R) ||
550+
VPScalarIVStepsRecipe, VPWidenPointerInductionRecipe>(R) ||
551551
(isa<VPInstruction>(R) &&
552552
all_of(cast<VPSingleDefRecipe>(R)->users(),
553553
[&](VPUser *U) {

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
55
; CHECK-LABEL: add
6-
; CHECK: LV(REG): Found max usage: 2 item
6+
; CHECK: LV(REG): VF = 8
7+
; CHECK-NEXT: LV(REG): Found max usage: 2 item
78
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
89
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
910
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44

55
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
66
; CHECK-LABEL: add
7-
; ZVFH: LV(REG): Found max usage: 2 item
7+
; ZVFH: LV(REG): VF = 8
8+
; ZVFH-NEXT: LV(REG): Found max usage: 2 item
89
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
910
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
1011
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
1112
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
12-
; ZVFHMIN: LV(REG): Found max usage: 2 item
13+
; ZVFHMIN: LV(REG): VF = 8
14+
; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item
1315
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
1416
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
1517
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,28 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
2828
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers
2929
; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
3030
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
31-
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
31+
; CHECK-LMUL1: LV(REG): VF = 2
32+
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
3233
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
3334
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
3435
; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item
3536
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
36-
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
37+
; CHECK-LMUL2: LV(REG): VF = 4
38+
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
3739
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
38-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
40+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
3941
; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item
4042
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
41-
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
43+
; CHECK-LMUL4: LV(REG): VF = 8
44+
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
4245
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
43-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
46+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
4447
; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item
4548
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
46-
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
49+
; CHECK-LMUL8: LV(REG): VF = 16
50+
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
4751
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
48-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
52+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
4953
; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item
5054
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
5155

@@ -76,17 +80,21 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
7680
; CHECK-SCALAR: LV(REG): VF = 1
7781
; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
7882
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
79-
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
80-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
81-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers
82-
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
83-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
83+
; CHECK-LMUL1: LV(REG): VF = 2
84+
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
85+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
86+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
87+
; CHECK-LMUL2: LV(REG): VF = 4
88+
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
89+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
8490
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
85-
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
86-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
91+
; CHECK-LMUL4: LV(REG): VF = 8
92+
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
93+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
8794
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
88-
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
89-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
95+
; CHECK-LMUL8: LV(REG): VF = 16
96+
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
97+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
9098
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
9199
entry:
92100
%cmp3 = icmp sgt i32 %n, 0

0 commit comments

Comments
 (0)