-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[LV] Vectorize FMax via OrderedFCmpSelect w/o fast-math flags. #146711
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9d28282
5675396
ec473e5
caae126
92ebac1
25a1f39
d50a372
43ff8ed
a3ae508
52b72e7
88e581e
b92dde2
e59025f
d3a4ca9
6605452
32baf05
02156d9
6793b20
f579116
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -819,7 +819,8 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, | |
if (match(I, m_OrdOrUnordFMin(m_Value(), m_Value()))) | ||
return InstDesc(Kind == RecurKind::FMin, I); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only max is handled by OrderedFCmpSelect, not min? Can start w/ FMaxOGT only. |
||
if (match(I, m_OrdOrUnordFMax(m_Value(), m_Value()))) | ||
return InstDesc(Kind == RecurKind::FMax, I); | ||
return InstDesc( | ||
Kind == RecurKind::FMax || Kind == RecurKind::OrderedFCmpSelect, I); | ||
if (match(I, m_FMinNum(m_Value(), m_Value()))) | ||
return InstDesc(Kind == RecurKind::FMin, I); | ||
if (match(I, m_FMaxNum(m_Value(), m_Value()))) | ||
|
@@ -962,6 +963,14 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( | |
"unexpected recurrence kind for minnum"); | ||
return InstDesc(I, RecurKind::FMinNum); | ||
} | ||
if (Kind == RecurKind::FMax || Kind == RecurKind::OrderedFCmpSelect) { | ||
if (isa<SelectInst>(I)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SelectInst need not check that its comparison is ordered and isMinMaxPattern() like FCmpInst below? |
||
return InstDesc(I, RecurKind::OrderedFCmpSelect); | ||
auto *Cmp = dyn_cast<FCmpInst>(I); | ||
if (Cmp && FCmpInst::isOrdered(Cmp->getPredicate()) && | ||
isMinMaxPattern(I, Kind, Prev).isRecurrence()) | ||
return InstDesc(I, RecurKind::OrderedFCmpSelect); | ||
} | ||
Comment on lines
+966
to
+973
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Deserves a comment like the one above for maxnum/minnum handling first non-NaN portion on input? |
||
return InstDesc(false, I); | ||
} | ||
if (isFMulAddIntrinsic(I)) | ||
|
@@ -1227,6 +1236,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { | |
case RecurKind::UMin: | ||
return Instruction::ICmp; | ||
case RecurKind::FMax: | ||
case RecurKind::OrderedFCmpSelect: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Placed between FMax and FMin? |
||
case RecurKind::FMin: | ||
case RecurKind::FMaximum: | ||
case RecurKind::FMinimum: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -937,6 +937,7 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) { | |
return Intrinsic::vector_reduce_umax; | ||
case RecurKind::UMin: | ||
return Intrinsic::vector_reduce_umin; | ||
case RecurKind::OrderedFCmpSelect: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is for max, returning Intrinsic::vector_reduce_fmax; need a separate case for min, returning Intrinsic::vector_reduce_fmin as below? |
||
case RecurKind::FMax: | ||
case RecurKind::FMaxNum: | ||
return Intrinsic::vector_reduce_fmax; | ||
|
@@ -1088,6 +1089,7 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) { | |
return CmpInst::ICMP_SGT; | ||
case RecurKind::FMin: | ||
return CmpInst::FCMP_OLT; | ||
case RecurKind::OrderedFCmpSelect: | ||
case RecurKind::FMax: | ||
return CmpInst::FCMP_OGT; | ||
// We do not add FMinimum/FMaximum recurrence kind here since there is no | ||
|
@@ -1310,6 +1312,7 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src, | |
case RecurKind::SMin: | ||
case RecurKind::UMax: | ||
case RecurKind::UMin: | ||
case RecurKind::OrderedFCmpSelect: | ||
case RecurKind::FMax: | ||
case RecurKind::FMin: | ||
Comment on lines
+1315
to
1317
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps better placed after fast/simple FMax/FMin, right before complex FMaxNum/FMinNum. |
||
case RecurKind::FMinNum: | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -654,7 +654,105 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, | |||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { | ||||||||||||||||||||||||||||||||||||||||||||
static bool handleOrderedFCmpSelect(VPlan &Plan, | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth documenting how MaxNum/MinNum and FMaxOGT/FMinOLT are handled, along with examples demonstrating their challenges and how the latter processes the final horizontal reduction to fix its +0/-0 discrepancy by recording first/last argmax/argmin (or sign of first/last zero), and how the former reverts to scalar loop when encountering NaN's (rather than trying a similar fix?). |
||||||||||||||||||||||||||||||||||||||||||||
VPReductionPHIRecipe *RedPhiR) { | ||||||||||||||||||||||||||||||||||||||||||||
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); | ||||||||||||||||||||||||||||||||||||||||||||
VPWidenIntOrFpInductionRecipe *WideIV = nullptr; | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
// MaxOp feeding the reduction phi must be a select (either wide or a | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Names refer to max only. Would be good to clarify parts that apply to min as well, if any. |
||||||||||||||||||||||||||||||||||||||||||||
// replicate recipe), where the phi is the last operand, and the compare | ||||||||||||||||||||||||||||||||||||||||||||
// predicate is strict. This ensures NaNs won't get propagated unless the | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strictness implies tie-breaker of signed zeroes is first zero, non-strict implies last zero. Ordered comparison ensures NaNs won't get propagated if initial value is not a NaN, and if it is result will be NaN. Can the Unordered case (and non-strict until supported by LastIndex) be handled by continuing with scalar epilog when a NaN is encountered, as in FMaxNum/FMinNum. Or does the +0/-0 tie breaking prevent that solution, or requires bailing out if either NaN or zero is encountered. |
||||||||||||||||||||||||||||||||||||||||||||
// initial value is NaN | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Comment on lines
+662
to
+665
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May be easier to spell out the pattern, e.g.,
|
||||||||||||||||||||||||||||||||||||||||||||
auto *MaxOp = dyn_cast<VPRecipeWithIRFlags>( | ||||||||||||||||||||||||||||||||||||||||||||
RedPhiR->getBackedgeValue()->getDefiningRecipe()); | ||||||||||||||||||||||||||||||||||||||||||||
if (!MaxOp) | ||||||||||||||||||||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||||||||||||||||||||
auto *RepR = dyn_cast<VPReplicateRecipe>(MaxOp); | ||||||||||||||||||||||||||||||||||||||||||||
if (!isa<VPWidenSelectRecipe>(MaxOp) && | ||||||||||||||||||||||||||||||||||||||||||||
!(RepR && (isa<SelectInst>(RepR->getUnderlyingInstr())))) | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
auto *Cmp = cast<VPRecipeWithIRFlags>(MaxOp->getOperand(0)); | ||||||||||||||||||||||||||||||||||||||||||||
if (MaxOp->getOperand(1) == RedPhiR || | ||||||||||||||||||||||||||||||||||||||||||||
!CmpInst::isStrictPredicate(Cmp->getPredicate())) | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better also make sure the operands of the strict Cmp are in the expected order - same as in the select MaxOp rather than flipped, and that Cmp isOrdered? I.e., OGT or OLT, excluding UGT and ULT. |
||||||||||||||||||||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) { | ||||||||||||||||||||||||||||||||||||||||||||
// We need a wide canonical IV | ||||||||||||||||||||||||||||||||||||||||||||
if (auto *CurIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) { | ||||||||||||||||||||||||||||||||||||||||||||
if (CurIV->isCanonical()) { | ||||||||||||||||||||||||||||||||||||||||||||
WideIV = CurIV; | ||||||||||||||||||||||||||||||||||||||||||||
break; | ||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
// A wide canonical IV is currently required. | ||||||||||||||||||||||||||||||||||||||||||||
// TODO: Create an induction if no suitable existing one is available. | ||||||||||||||||||||||||||||||||||||||||||||
if (!WideIV) | ||||||||||||||||||||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||||||||||||||||||||
Comment on lines
+690
to
+693
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that a scalar canonical IV always exists, and is unique. But widen ones may exist (last one found is used?) or not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, at this stage, all inductions will still be widened, but may not be canonical. |
||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
// Create a reduction that tracks the first indices where the latest maximum | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||
// value has been selected. This is later used to select the max value from | ||||||||||||||||||||||||||||||||||||||||||||
// the partial reductions in a way that correctly handles signed zeros and | ||||||||||||||||||||||||||||||||||||||||||||
// NaNs in the input. | ||||||||||||||||||||||||||||||||||||||||||||
Comment on lines
+697
to
+698
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(NaN's nor non-zero numbers do not require the tracked indices.) |
||||||||||||||||||||||||||||||||||||||||||||
// Note that we do not need to check if the induction may hit the sentinel | ||||||||||||||||||||||||||||||||||||||||||||
// value. If the sentinel value gets hit, the final reduction value is at the | ||||||||||||||||||||||||||||||||||||||||||||
// last index or the maximum was never set and all lanes contain the start | ||||||||||||||||||||||||||||||||||||||||||||
// value. In either case, the correct value is selected. | ||||||||||||||||||||||||||||||||||||||||||||
Comment on lines
+699
to
+702
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tried to elaborate, is this clear/correct/helpful?
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||
unsigned IVWidth = | ||||||||||||||||||||||||||||||||||||||||||||
VPTypeAnalysis(Plan).inferScalarType(WideIV)->getScalarSizeInBits(); | ||||||||||||||||||||||||||||||||||||||||||||
LLVMContext &Ctx = Plan.getScalarHeader()->getIRBasicBlock()->getContext(); | ||||||||||||||||||||||||||||||||||||||||||||
VPValue *UMinSentinel = | ||||||||||||||||||||||||||||||||||||||||||||
Plan.getOrAddLiveIn(ConstantInt::get(Ctx, APInt::getMaxValue(IVWidth))); | ||||||||||||||||||||||||||||||||||||||||||||
auto *IdxPhi = new VPReductionPHIRecipe(nullptr, RecurKind::FindFirstIVUMin, | ||||||||||||||||||||||||||||||||||||||||||||
*UMinSentinel, false, false, 1); | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth commenting constant parameters. |
||||||||||||||||||||||||||||||||||||||||||||
IdxPhi->insertBefore(RedPhiR); | ||||||||||||||||||||||||||||||||||||||||||||
auto *MinIdxSel = new VPInstruction(Instruction::Select, | ||||||||||||||||||||||||||||||||||||||||||||
{MaxOp->getOperand(0), WideIV, IdxPhi}); | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||
MinIdxSel->insertAfter(MaxOp); | ||||||||||||||||||||||||||||||||||||||||||||
IdxPhi->addOperand(MinIdxSel); | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
// Find the first index holding with the maximum value. This is used to | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||
// extract the lane with the final max value and is needed to handle signed | ||||||||||||||||||||||||||||||||||||||||||||
// zeros and NaNs in the input. | ||||||||||||||||||||||||||||||||||||||||||||
Comment on lines
+717
to
+718
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||
auto *MaxResult = find_singleton<VPSingleDefRecipe>( | ||||||||||||||||||||||||||||||||||||||||||||
RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * { | ||||||||||||||||||||||||||||||||||||||||||||
auto *VPI = dyn_cast<VPInstruction>(U); | ||||||||||||||||||||||||||||||||||||||||||||
if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult) | ||||||||||||||||||||||||||||||||||||||||||||
return VPI; | ||||||||||||||||||||||||||||||||||||||||||||
return nullptr; | ||||||||||||||||||||||||||||||||||||||||||||
}); | ||||||||||||||||||||||||||||||||||||||||||||
VPBuilder Builder(MaxResult->getParent(), | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||
std::next(MaxResult->getIterator())); | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
// Create mask for lanes that have the max value and use it to mask out | ||||||||||||||||||||||||||||||||||||||||||||
// indices that don't contain maximum values. | ||||||||||||||||||||||||||||||||||||||||||||
auto *MaskFinalMaxValue = Builder.createNaryOp( | ||||||||||||||||||||||||||||||||||||||||||||
Instruction::FCmp, {MaxResult->getOperand(1), MaxResult}, | ||||||||||||||||||||||||||||||||||||||||||||
VPIRFlags(CmpInst::FCMP_OEQ)); | ||||||||||||||||||||||||||||||||||||||||||||
auto *IndicesWithMaxValue = Builder.createNaryOp( | ||||||||||||||||||||||||||||||||||||||||||||
Instruction::Select, {MaskFinalMaxValue, MinIdxSel, UMinSentinel}); | ||||||||||||||||||||||||||||||||||||||||||||
auto *FirstMaxIdx = Builder.createNaryOp( | ||||||||||||||||||||||||||||||||||||||||||||
VPInstruction::ComputeFindIVResult, | ||||||||||||||||||||||||||||||||||||||||||||
{IdxPhi, WideIV->getStartValue(), UMinSentinel, IndicesWithMaxValue}); | ||||||||||||||||||||||||||||||||||||||||||||
// Convert the index of the first max value to an index in the vector lanes of | ||||||||||||||||||||||||||||||||||||||||||||
// the partial reduction results. This ensures we select the first max value | ||||||||||||||||||||||||||||||||||||||||||||
// and acts as a tie-breaker if the partial reductions contain signed zeros. | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The vertical computation of each partial reduction result takes care of NaNs and signed zeroes, it is only the horizontal reduction of these vector lanes that require tie-breaking, to handle potential signed zeroes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the tie-breaking is only needed to handle signed zeroes when computing the final reduction results. Consider a final partial reduction vector with We then compare the partial reduction values to the result of the horizontal reduction (-0.0 == +0.0 will also be true, selecting all lanes with zeros of any signed-ness) Out of those, we select the one encountered first using FindFirstIV. Note that this only works for strict predicates. |
||||||||||||||||||||||||||||||||||||||||||||
auto *FirstMaxLane = | ||||||||||||||||||||||||||||||||||||||||||||
Builder.createNaryOp(Instruction::URem, {FirstMaxIdx, &Plan.getVFxUF()}); | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
// Extract the final max value and update the users. | ||||||||||||||||||||||||||||||||||||||||||||
auto *Res = Builder.createNaryOp(VPInstruction::ExtractLane, | ||||||||||||||||||||||||||||||||||||||||||||
{FirstMaxLane, MaxResult->getOperand(1)}); | ||||||||||||||||||||||||||||||||||||||||||||
MaxResult->replaceUsesWithIf(Res, [MaskFinalMaxValue](VPUser &U, unsigned) { | ||||||||||||||||||||||||||||||||||||||||||||
return &U != MaskFinalMaxValue; | ||||||||||||||||||||||||||||||||||||||||||||
}); | ||||||||||||||||||||||||||||||||||||||||||||
return true; | ||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
bool VPlanTransforms::handleMaxMinNumAndOrderedFCmpSelectReductions( | ||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better retain |
||||||||||||||||||||||||||||||||||||||||||||
VPlan &Plan) { | ||||||||||||||||||||||||||||||||||||||||||||
auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * { | ||||||||||||||||||||||||||||||||||||||||||||
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>( | ||||||||||||||||||||||||||||||||||||||||||||
RedPhiR->getBackedgeValue()->getDefiningRecipe()); | ||||||||||||||||||||||||||||||||||||||||||||
|
@@ -703,7 +801,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { | |||||||||||||||||||||||||||||||||||||||||||
if (RedPhiR) | ||||||||||||||||||||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||||||||||||||||||||
if (Cur->getRecurrenceKind() != RecurKind::FMaxNum && | ||||||||||||||||||||||||||||||||||||||||||||
Cur->getRecurrenceKind() != RecurKind::FMinNum) { | ||||||||||||||||||||||||||||||||||||||||||||
Cur->getRecurrenceKind() != RecurKind::FMinNum && | ||||||||||||||||||||||||||||||||||||||||||||
Cur->getRecurrenceKind() != RecurKind::OrderedFCmpSelect) { | ||||||||||||||||||||||||||||||||||||||||||||
HasUnsupportedPhi = true; | ||||||||||||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||
|
@@ -713,6 +812,15 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { | |||||||||||||||||||||||||||||||||||||||||||
if (!RedPhiR) | ||||||||||||||||||||||||||||||||||||||||||||
return true; | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
if (HasUnsupportedPhi) | ||||||||||||||||||||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
if (RedPhiR->getRecurrenceKind() == RecurKind::OrderedFCmpSelect) | ||||||||||||||||||||||||||||||||||||||||||||
return handleOrderedFCmpSelect(Plan, RedPhiR); | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
// Try to update the vector loop to exit early if any input is NaN and resume | ||||||||||||||||||||||||||||||||||||||||||||
// executing in the scalar loop to handle the NaNs there. | ||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||
// We won't be able to resume execution in the scalar tail, if there are | ||||||||||||||||||||||||||||||||||||||||||||
// unsupported header phis or there is no scalar tail at all, due to | ||||||||||||||||||||||||||||||||||||||||||||
// tail-folding. | ||||||||||||||||||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -103,11 +103,10 @@ struct VPlanTransforms { | |||||||||
/// not valid. | ||||||||||
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder); | ||||||||||
|
||||||||||
/// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do, | ||||||||||
/// try to update the vector loop to exit early if any input is NaN and resume | ||||||||||
/// executing in the scalar loop to handle the NaNs there. Return false if | ||||||||||
/// this attempt was unsuccessful. | ||||||||||
static bool handleMaxMinNumReductions(VPlan &Plan); | ||||||||||
/// Check if \p Plan contains any FMaxNum, FMinNum or reductions. If they do, | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Suggested change
|
||||||||||
/// try to update the vector loop to account for NaNs and signed zeros as | ||||||||||
/// needed. | ||||||||||
static bool handleMaxMinNumAndOrderedFCmpSelectReductions(VPlan &Plan); | ||||||||||
|
||||||||||
/// Clear NSW/NUW flags from reduction instructions if necessary. | ||||||||||
static void clearReductionWrapFlags(VPlan &Plan); | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Continuing to look for good name(s), how about:
Have distinct cases for max vs. min, as in other cases, or fuse them into one FMinMax?
Rename existing FMax/FMin to FMaxFast/FMinFast?
NaN's are handled the same by OrderedFCmpSelect and by FMax/FMin-with-OGT/OLT as their compare predicate, where the latter assume all elements are non-NaN by FMF; the distinction lies in tie breaking +0/-0?
Need to also indicate if +0/-0 ties are broken according to the sign of the first zero or of last zero, which can be solved by computing index of first max/min or index of last max/min, respectively. Cases FMaxOGE/FMinOLE can be added later to denote +0/-0 ties are broken by last index rather than first index.
("Users need to handle ..." also applies to FMaxNum/FMinNum, suffice for enum to state semantics rather than how to keep them, there and here.)