@@ -895,6 +895,13 @@ class InstructionsState {
895895 is_contained(AddSub, getAltOpcode());
896896 }
897897
898+ /// Checks if main/alt instructions are cmp operations.
899+ bool isCmpOp() const {
900+ return (getOpcode() == Instruction::ICmp ||
901+ getOpcode() == Instruction::FCmp) &&
902+ getAltOpcode() == getOpcode();
903+ }
904+
898905 /// Checks if the current state is valid, i.e. has non-null MainOp
899906 bool valid() const { return MainOp && AltOp; }
900907
@@ -9277,22 +9284,23 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
92779284 // as alternate ops.
92789285 if (NumParts >= VL.size())
92799286 return false;
9287+ constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
9288+ InstructionCost InsertCost = ::getShuffleCost(
9289+ *TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
9290+ FixedVectorType *SubVecTy =
9291+ getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
9292+ InstructionCost NewShuffleCost =
9293+ ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
9294+ if (!LocalState.isCmpOp() && NumParts <= 1 &&
9295+ (Mask.empty() || InsertCost >= NewShuffleCost))
9296+ return false;
92809297 if ((LocalState.getMainOp()->isBinaryOp() &&
92819298 LocalState.getAltOp()->isBinaryOp() &&
92829299 (LocalState.isShiftOp() || LocalState.isBitwiseLogicOp() ||
92839300 LocalState.isAddSubLikeOp() || LocalState.isMulDivLikeOp())) ||
92849301 (LocalState.getMainOp()->isCast() && LocalState.getAltOp()->isCast()) ||
92859302 (LocalState.getMainOp()->isUnaryOp() &&
92869303 LocalState.getAltOp()->isUnaryOp())) {
9287- constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
9288- InstructionCost InsertCost = ::getShuffleCost(
9289- *TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
9290- FixedVectorType *SubVecTy =
9291- getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
9292- InstructionCost NewShuffleCost =
9293- ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
9294- if (NumParts <= 1 && (Mask.empty() || InsertCost >= NewShuffleCost))
9295- return false;
92969304 InstructionCost OriginalVecOpsCost =
92979305 TTI->getArithmeticInstrCost(Opcode0, VecTy, Kind) +
92989306 TTI->getArithmeticInstrCost(Opcode1, VecTy, Kind);
@@ -9429,18 +9437,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
94299437 if (!canBuildSplitNode(VL, LocalState, Op1, Op2, ReorderIndices))
94309438 return false;
94319439
9432- // Any value is used in split node already - just gather.
9433- if (any_of(VL, [&](Value *V) {
9434- return ScalarsInSplitNodes.contains(V) || isVectorized(V);
9435- })) {
9436- if (TryToFindDuplicates(S)) {
9437- auto Invalid = ScheduleBundle::invalid();
9438- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9439- ReuseShuffleIndices);
9440- }
9441- return true;
9442- }
9443-
94449440 SmallVector<Value *> NewVL(VL.size());
94459441 copy(Op1, NewVL.begin());
94469442 copy(Op2, std::next(NewVL.begin(), Op1.size()));
@@ -9616,9 +9612,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96169612 ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy, {}, Kind) +
96179613 ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Extracted,
96189614 /*Insert=*/false, /*Extract=*/true, Kind);
9619- InstructionCost ScalarizeCostEstimate =
9620- ::getScalarizationOverhead( *TTI, ScalarTy, VecTy, Vectorized,
9621- /*Insert=*/true, /*Extract=*/false, Kind);
9615+ InstructionCost ScalarizeCostEstimate = ::getScalarizationOverhead(
9616+ *TTI, ScalarTy, VecTy, Vectorized,
9617+ /*Insert=*/true, /*Extract=*/false, Kind, /*ForPoisonSrc=*/false );
96229618 PreferScalarize = VectorizeCostEstimate > ScalarizeCostEstimate;
96239619 }
96249620 if (PreferScalarize) {
0 commit comments