@@ -21722,6 +21722,8 @@ class HorizontalReduction {
2172221722 /// Checks if the optimization of original scalar identity operations on
2172321723 /// matched horizontal reductions is enabled and allowed.
2172421724 bool IsSupportedHorRdxIdentityOp = false;
21725+ /// The minimum number of the reduced values.
21726+ const unsigned ReductionLimit = VectorizeNonPowerOf2 ? 3 : 4;
2172521727 /// Contains vector values for reduction including their scale factor and
2172621728 /// signedness.
2172721729 SmallVector<std::tuple<Value *, unsigned, bool>> VectorValuesAndScales;
@@ -21740,7 +21742,8 @@ class HorizontalReduction {
2174021742 }
2174121743
2174221744 /// Checks if instruction is associative and can be vectorized.
21743- static bool isVectorizable(RecurKind Kind, Instruction *I) {
21745+ static bool isVectorizable(RecurKind Kind, Instruction *I,
21746+ bool TwoElementReduction = false) {
2174421747 if (Kind == RecurKind::None)
2174521748 return false;
2174621749
@@ -21749,6 +21752,10 @@ class HorizontalReduction {
2174921752 isBoolLogicOp(I))
2175021753 return true;
2175121754
21755+ // No need to check for associativity, if 2 reduced values.
21756+ if (TwoElementReduction)
21757+ return true;
21758+
2175221759 if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
2175321760 // FP min/max are associative except for NaN and -0.0. We do not
2175421761 // have to rule out -0.0 here because the intrinsic semantics do not
@@ -22020,6 +22027,27 @@ class HorizontalReduction {
2202022027
2202122028public:
2202222029 HorizontalReduction() = default;
22030+ HorizontalReduction(Instruction *I, ArrayRef<Value *> Ops)
22031+ : ReductionRoot(I), ReductionLimit(2) {
22032+ RdxKind = HorizontalReduction::getRdxKind(I);
22033+ ReductionOps.emplace_back().push_back(I);
22034+ ReducedVals.emplace_back().assign(Ops.begin(), Ops.end());
22035+ for (Value *V : Ops)
22036+ ReducedValsToOps[V].push_back(I);
22037+ }
22038+
22039+ bool matchReductionForOperands() const {
22040+ // Analyze "regular" integer/FP types for reductions - no target-specific
22041+ // types or pointers.
22042+ assert(ReductionRoot && "Reduction root is not set!");
22043+ if (!isVectorizable(RdxKind, cast<Instruction>(ReductionRoot),
22044+ all_of(ReducedVals, [](ArrayRef<Value *> Ops) {
22045+ return Ops.size() == 2;
22046+ })))
22047+ return false;
22048+
22049+ return true;
22050+ }
2202322051
2202422052 /// Try to find a reduction tree.
2202522053 bool matchAssociativeReduction(BoUpSLP &R, Instruction *Root,
@@ -22187,7 +22215,6 @@ class HorizontalReduction {
2218722215 /// Attempt to vectorize the tree found by matchAssociativeReduction.
2218822216 Value *tryToReduce(BoUpSLP &V, const DataLayout &DL, TargetTransformInfo *TTI,
2218922217 const TargetLibraryInfo &TLI, AssumptionCache *AC) {
22190- const unsigned ReductionLimit = VectorizeNonPowerOf2 ? 3 : 4;
2219122218 constexpr unsigned RegMaxNumber = 4;
2219222219 constexpr unsigned RedValsMaxNumber = 128;
2219322220 // If there are a sufficient number of reduction values, reduce
@@ -23736,15 +23763,60 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
2373623763 Candidates.emplace_back(A1, B);
2373723764 }
2373823765
23766+ auto TryToReduce = [this, &R, &TTI = *TTI](Instruction *Inst,
23767+ ArrayRef<Value *> Ops) {
23768+ if (!isReductionCandidate(Inst))
23769+ return false;
23770+ Type *Ty = Inst->getType();
23771+ if (!isValidElementType(Ty) || Ty->isPointerTy())
23772+ return false;
23773+ HorizontalReduction HorRdx(Inst, Ops);
23774+ if (!HorRdx.matchReductionForOperands())
23775+ return false;
23776+ // Check the cost of operations.
23777+ VectorType *VecTy = getWidenedType(Ty, Ops.size());
23778+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
23779+ InstructionCost ScalarCost =
23780+ TTI.getScalarizationOverhead(
23781+ VecTy, APInt::getAllOnes(getNumElements(VecTy)), /*Insert=*/false,
23782+ /*Extract=*/true, CostKind) +
23783+ TTI.getInstructionCost(Inst, CostKind);
23784+ InstructionCost RedCost;
23785+ switch (::getRdxKind(Inst)) {
23786+ case RecurKind::Add:
23787+ case RecurKind::Mul:
23788+ case RecurKind::Or:
23789+ case RecurKind::And:
23790+ case RecurKind::Xor:
23791+ case RecurKind::FAdd:
23792+ case RecurKind::FMul: {
23793+ FastMathFlags FMF;
23794+ if (auto *FPCI = dyn_cast<FPMathOperator>(Inst))
23795+ FMF = FPCI->getFastMathFlags();
23796+ RedCost = TTI.getArithmeticReductionCost(Inst->getOpcode(), VecTy, FMF,
23797+ CostKind);
23798+ break;
23799+ }
23800+ default:
23801+ return false;
23802+ }
23803+ if (RedCost >= ScalarCost)
23804+ return false;
23805+
23806+ return HorRdx.tryToReduce(R, *DL, &TTI, *TLI, AC) != nullptr;
23807+ };
2373923808 if (Candidates.size() == 1)
23740- return tryToVectorizeList({Op0, Op1}, R);
23809+ return TryToReduce(I, {Op0, Op1}) || tryToVectorizeList({Op0, Op1}, R);
2374123810
2374223811 // We have multiple options. Try to pick the single best.
2374323812 std::optional<int> BestCandidate = R.findBestRootPair(Candidates);
2374423813 if (!BestCandidate)
2374523814 return false;
23746- return tryToVectorizeList(
23747- {Candidates[*BestCandidate].first, Candidates[*BestCandidate].second}, R);
23815+ return TryToReduce(I, {Candidates[*BestCandidate].first,
23816+ Candidates[*BestCandidate].second}) ||
23817+ tryToVectorizeList({Candidates[*BestCandidate].first,
23818+ Candidates[*BestCandidate].second},
23819+ R);
2374823820}
2374923821
2375023822bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Instruction *Root,
0 commit comments