Skip to content

Commit a91da31

Browse files
fhahntru
authored andcommitted
[LV] Vectorize maxnum/minnum w/o fast-math flags. (#148239)
Update LV to vectorize maxnum/minnum reductions without fast-math flags, by adding an extra check in the loop if any inputs to maxnum/minnum are NaN, due to maxnum/minnum behavior w.r.t to signaling NaNs. Signed-zeros are already handled consistently by maxnum/minnum. If any input is NaN, *exit the vector loop, *compute the reduction result up to the vector iteration that contained NaN inputs and * resume in the scalar loop New recurrence kinds are added for reductions using maxnum/minnum without fast-math flags. PR: #148239
1 parent dbe3ba0 commit a91da31

16 files changed

+731
-58
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ enum class RecurKind {
4747
FMul, ///< Product of floats.
4848
FMin, ///< FP min implemented in terms of select(cmp()).
4949
FMax, ///< FP max implemented in terms of select(cmp()).
50+
FMinNum, ///< FP min with llvm.minnum semantics including NaNs.
51+
FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs.
5052
FMinimum, ///< FP min with llvm.minimum semantics
5153
FMaximum, ///< FP max with llvm.maximum semantics
5254
FMinimumNum, ///< FP min with llvm.minimumnum semantics
@@ -250,6 +252,7 @@ class RecurrenceDescriptor {
250252
/// Returns true if the recurrence kind is a floating-point min/max kind.
251253
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
252254
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
255+
Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum ||
253256
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
254257
Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
255258
}

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -941,10 +941,30 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
941941
m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) ||
942942
match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value()));
943943
};
944-
if (isIntMinMaxRecurrenceKind(Kind) ||
945-
(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
944+
if (isIntMinMaxRecurrenceKind(Kind))
946945
return isMinMaxPattern(I, Kind, Prev);
947-
else if (isFMulAddIntrinsic(I))
946+
if (isFPMinMaxRecurrenceKind(Kind)) {
947+
InstDesc Res = isMinMaxPattern(I, Kind, Prev);
948+
if (!Res.isRecurrence())
949+
return InstDesc(false, I);
950+
if (HasRequiredFMF())
951+
return Res;
952+
// We may be able to vectorize FMax/FMin reductions using maxnum/minnum
953+
// intrinsics with extra checks ensuring the vector loop handles only
954+
// non-NaN inputs.
955+
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) {
956+
assert(Kind == RecurKind::FMax &&
957+
"unexpected recurrence kind for maxnum");
958+
return InstDesc(I, RecurKind::FMaxNum);
959+
}
960+
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) {
961+
assert(Kind == RecurKind::FMin &&
962+
"unexpected recurrence kind for minnum");
963+
return InstDesc(I, RecurKind::FMinNum);
964+
}
965+
return InstDesc(false, I);
966+
}
967+
if (isFMulAddIntrinsic(I))
948968
return InstDesc(Kind == RecurKind::FMulAdd, I,
949969
I->hasAllowReassoc() ? nullptr : I);
950970
return InstDesc(false, I);

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -938,8 +938,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
938938
case RecurKind::UMin:
939939
return Intrinsic::vector_reduce_umin;
940940
case RecurKind::FMax:
941+
case RecurKind::FMaxNum:
941942
return Intrinsic::vector_reduce_fmax;
942943
case RecurKind::FMin:
944+
case RecurKind::FMinNum:
943945
return Intrinsic::vector_reduce_fmin;
944946
case RecurKind::FMaximum:
945947
return Intrinsic::vector_reduce_fmaximum;
@@ -1037,8 +1039,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
10371039
case RecurKind::SMax:
10381040
return Intrinsic::smax;
10391041
case RecurKind::FMin:
1042+
case RecurKind::FMinNum:
10401043
return Intrinsic::minnum;
10411044
case RecurKind::FMax:
1045+
case RecurKind::FMaxNum:
10421046
return Intrinsic::maxnum;
10431047
case RecurKind::FMinimum:
10441048
return Intrinsic::minimum;
@@ -1096,9 +1100,9 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
10961100
Value *Right) {
10971101
Type *Ty = Left->getType();
10981102
if (Ty->isIntOrIntVectorTy() ||
1099-
(RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
1103+
(RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum ||
1104+
RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
11001105
RK == RecurKind::FMinimumNum || RK == RecurKind::FMaximumNum)) {
1101-
// TODO: Add float minnum/maxnum support when FMF nnan is set.
11021106
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
11031107
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
11041108
"rdx.minmax");
@@ -1308,6 +1312,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
13081312
case RecurKind::UMin:
13091313
case RecurKind::FMax:
13101314
case RecurKind::FMin:
1315+
case RecurKind::FMinNum:
1316+
case RecurKind::FMaxNum:
13111317
case RecurKind::FMinimum:
13121318
case RecurKind::FMaximum:
13131319
case RecurKind::FMinimumNum:

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ class VPBuilder {
230230

231231
/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
232232
/// and \p B.
233-
/// TODO: add createFCmp when needed.
234233
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
235234
DebugLoc DL = DebugLoc::getUnknown(),
236235
const Twine &Name = "") {
@@ -240,6 +239,17 @@ class VPBuilder {
240239
new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name));
241240
}
242241

242+
/// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
243+
/// and \p B.
244+
VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
245+
DebugLoc DL = DebugLoc::getUnknown(),
246+
const Twine &Name = "") {
247+
assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
248+
Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
249+
return tryInsertInstruction(
250+
new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name));
251+
}
252+
243253
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
244254
DebugLoc DL = DebugLoc::getUnknown(),
245255
const Twine &Name = "") {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4345,10 +4345,14 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43454345

43464346
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
43474347
ElementCount VF) const {
4348-
// Cross iteration phis such as reductions need special handling and are
4349-
// currently unsupported.
4350-
if (any_of(OrigLoop->getHeader()->phis(),
4351-
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
4348+
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
4349+
// reductions need special handling and are currently unsupported.
4350+
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
4351+
if (!Legal->isReductionVariable(&Phi))
4352+
return Legal->isFixedOrderRecurrence(&Phi);
4353+
RecurKind RK = Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
4354+
return RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum;
4355+
}))
43524356
return false;
43534357

43544358
// Phis with uses outside of the loop require special handling and are
@@ -8817,6 +8821,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88178821
// Adjust the recipes for any inloop reductions.
88188822
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
88198823

8824+
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
8825+
// NaNs if possible, bail out otherwise.
8826+
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
8827+
*Plan))
8828+
return nullptr;
8829+
88208830
// Transform recipes to abstract recipes if it is legal and beneficial and
88218831
// clamp the range for better cost estimation.
88228832
// TODO: Enable following transform when the EVL-version of extended-reduction

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23196,6 +23196,8 @@ class HorizontalReduction {
2319623196
case RecurKind::FindFirstIVUMin:
2319723197
case RecurKind::FindLastIVSMax:
2319823198
case RecurKind::FindLastIVUMax:
23199+
case RecurKind::FMaxNum:
23200+
case RecurKind::FMinNum:
2319923201
case RecurKind::FMaximumNum:
2320023202
case RecurKind::FMinimumNum:
2320123203
case RecurKind::None:
@@ -23333,6 +23335,8 @@ class HorizontalReduction {
2333323335
case RecurKind::FindFirstIVUMin:
2333423336
case RecurKind::FindLastIVSMax:
2333523337
case RecurKind::FindLastIVUMax:
23338+
case RecurKind::FMaxNum:
23339+
case RecurKind::FMinNum:
2333623340
case RecurKind::FMaximumNum:
2333723341
case RecurKind::FMinimumNum:
2333823342
case RecurKind::None:
@@ -23435,6 +23439,8 @@ class HorizontalReduction {
2343523439
case RecurKind::FindFirstIVUMin:
2343623440
case RecurKind::FindLastIVSMax:
2343723441
case RecurKind::FindLastIVUMax:
23442+
case RecurKind::FMaxNum:
23443+
case RecurKind::FMinNum:
2343823444
case RecurKind::FMaximumNum:
2343923445
case RecurKind::FMinimumNum:
2344023446
case RecurKind::None:

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
8484
return ResTy;
8585
}
8686
case Instruction::ICmp:
87+
case Instruction::FCmp:
8788
case VPInstruction::ActiveLaneMask:
8889
assert(inferScalarType(R->getOperand(0)) ==
8990
inferScalarType(R->getOperand(1)) &&

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,3 +628,163 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
628628
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
629629
}
630630
}
631+
632+
bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
633+
auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
634+
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
635+
RedPhiR->getBackedgeValue()->getDefiningRecipe());
636+
if (!MinMaxR)
637+
return nullptr;
638+
639+
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR);
640+
if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
641+
!(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr())))
642+
return nullptr;
643+
644+
#ifndef NDEBUG
645+
Intrinsic::ID RdxIntrinsicId =
646+
RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
647+
: Intrinsic::minnum;
648+
assert((isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
649+
cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
650+
RdxIntrinsicId) ||
651+
(RepR &&
652+
cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID() ==
653+
RdxIntrinsicId) &&
654+
"Intrinsic did not match recurrence kind");
655+
#endif
656+
657+
if (MinMaxR->getOperand(0) == RedPhiR)
658+
return MinMaxR->getOperand(1);
659+
660+
assert(MinMaxR->getOperand(1) == RedPhiR &&
661+
"Reduction phi operand expected");
662+
return MinMaxR->getOperand(0);
663+
};
664+
665+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
666+
VPReductionPHIRecipe *RedPhiR = nullptr;
667+
bool HasUnsupportedPhi = false;
668+
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
669+
if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R))
670+
continue;
671+
auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
672+
if (!Cur) {
673+
// TODO: Also support fixed-order recurrence phis.
674+
HasUnsupportedPhi = true;
675+
continue;
676+
}
677+
// For now, only a single reduction is supported.
678+
// TODO: Support multiple MaxNum/MinNum reductions and other reductions.
679+
if (RedPhiR)
680+
return false;
681+
if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
682+
Cur->getRecurrenceKind() != RecurKind::FMinNum) {
683+
HasUnsupportedPhi = true;
684+
continue;
685+
}
686+
RedPhiR = Cur;
687+
}
688+
689+
if (!RedPhiR)
690+
return true;
691+
692+
// We won't be able to resume execution in the scalar tail, if there are
693+
// unsupported header phis or there is no scalar tail at all, due to
694+
// tail-folding.
695+
if (HasUnsupportedPhi || !Plan.hasScalarTail())
696+
return false;
697+
698+
VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
699+
if (!MinMaxOp)
700+
return false;
701+
702+
RecurKind RedPhiRK = RedPhiR->getRecurrenceKind();
703+
assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) &&
704+
"unsupported reduction");
705+
706+
/// Check if the vector loop of \p Plan can early exit and restart
707+
/// execution of last vector iteration in the scalar loop. This requires all
708+
/// recipes up to early exit point be side-effect free as they are
709+
/// re-executed. Currently we check that the loop is free of any recipe that
710+
/// may write to memory. Expected to operate on an early VPlan w/o nested
711+
/// regions.
712+
for (VPBlockBase *VPB : vp_depth_first_shallow(
713+
Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
714+
auto *VPBB = cast<VPBasicBlock>(VPB);
715+
for (auto &R : *VPBB) {
716+
if (R.mayWriteToMemory() &&
717+
!match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
718+
return false;
719+
}
720+
}
721+
722+
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
723+
VPBuilder Builder(LatchVPBB->getTerminator());
724+
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
725+
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
726+
"Unexpected terminator");
727+
auto *IsLatchExitTaken =
728+
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
729+
LatchExitingBranch->getOperand(1));
730+
731+
VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
732+
VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
733+
auto *AnyExitTaken =
734+
Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
735+
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
736+
LatchExitingBranch->eraseFromParent();
737+
738+
// If we exit early due to NaNs, compute the final reduction result based on
739+
// the reduction phi at the beginning of the last vector iteration.
740+
auto *RdxResult = find_singleton<VPSingleDefRecipe>(
741+
RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
742+
auto *VPI = dyn_cast<VPInstruction>(U);
743+
if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
744+
return VPI;
745+
return nullptr;
746+
});
747+
748+
auto *MiddleVPBB = Plan.getMiddleBlock();
749+
Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
750+
auto *NewSel =
751+
Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1));
752+
RdxResult->setOperand(1, NewSel);
753+
754+
auto *ScalarPH = Plan.getScalarPreheader();
755+
// Update resume phis for inductions in the scalar preheader. If AnyNaN is
756+
// true, the resume from the start of the last vector iteration via the
757+
// canonical IV, otherwise from the original value.
758+
for (auto &R : ScalarPH->phis()) {
759+
auto *ResumeR = cast<VPPhi>(&R);
760+
VPValue *VecV = ResumeR->getOperand(0);
761+
if (VecV == RdxResult)
762+
continue;
763+
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
764+
if (DerivedIV->getNumUsers() == 1 &&
765+
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
766+
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
767+
&Plan.getVectorTripCount());
768+
DerivedIV->moveAfter(&*Builder.getInsertPoint());
769+
DerivedIV->setOperand(1, NewSel);
770+
continue;
771+
}
772+
}
773+
// Bail out and abandon the current, partially modified, VPlan if we
774+
// encounter resume phi that cannot be updated yet.
775+
if (VecV != &Plan.getVectorTripCount()) {
776+
LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
777+
"FMaxNum/FMinNum reduction.\n");
778+
return false;
779+
}
780+
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
781+
ResumeR->setOperand(0, NewSel);
782+
}
783+
784+
auto *MiddleTerm = MiddleVPBB->getTerminator();
785+
Builder.setInsertPoint(MiddleTerm);
786+
VPValue *MiddleCond = MiddleTerm->getOperand(0);
787+
VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
788+
MiddleTerm->setOperand(0, NewCond);
789+
return true;
790+
}

0 commit comments

Comments
 (0)