@@ -9558,7 +9558,8 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
95589558 continue;
95599559 }
95609560 if ((LocalState.getAltOpcode() != LocalState.getOpcode() &&
9561- I->getOpcode() == LocalState.getOpcode()) ||
9561+ isMainInstruction(I, LocalState.getMainOp(), LocalState.getAltOp(),
9562+ *TLI)) ||
95629563 (LocalState.getAltOpcode() == LocalState.getOpcode() &&
95639564 !isAlternateInstruction(I, LocalState.getMainOp(),
95649565 LocalState.getAltOp(), *TLI))) {
@@ -9768,109 +9769,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
97689769 }
97699770 return true;
97709771 }
9771- <<<<<<< HEAD
9772- SmallVector<Value *> Op1, Op2;
9773- OrdersType ReorderIndices(VL.size(), VL.size());
9774- SmallBitVector Op1Indices(VL.size());
9775- for (auto [Idx, V] : enumerate(VL)) {
9776- auto *I = dyn_cast<Instruction>(V);
9777- if (!I) {
9778- Op1.push_back(V);
9779- Op1Indices.set(Idx);
9780- continue;
9781- }
9782- if ((LocalState.getAltOpcode() != LocalState.getOpcode() &&
9783- isMainInstruction(I, LocalState.getMainOp(), LocalState.getAltOp(),
9784- *TLI)) ||
9785- (LocalState.getAltOpcode() == LocalState.getOpcode() &&
9786- !isAlternateInstruction(I, LocalState.getMainOp(),
9787- LocalState.getAltOp(), *TLI))) {
9788- Op1.push_back(V);
9789- Op1Indices.set(Idx);
9790- continue;
9791- }
9792- Op2.push_back(V);
9793- }
9794- Type *ScalarTy = getValueType(VL.front());
9795- VectorType *VecTy = getWidenedType(ScalarTy, VL.size());
9796- unsigned Opcode0 = LocalState.getOpcode();
9797- unsigned Opcode1 = LocalState.getAltOpcode();
9798- SmallBitVector OpcodeMask(getAltInstrMask(VL, Opcode0, Opcode1));
9799- // Enable split node, only if all nodes do not form legal alternate
9800- // instruction (like X86 addsub).
9801- SmallPtrSet<Value *, 4> UOp1(llvm::from_range, Op1);
9802- SmallPtrSet<Value *, 4> UOp2(llvm::from_range, Op2);
9803- if (UOp1.size() <= 1 || UOp2.size() <= 1 ||
9804- TTI.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask) ||
9805- !hasFullVectorsOrPowerOf2(TTI, Op1.front()->getType(), Op1.size()) ||
9806- !hasFullVectorsOrPowerOf2(TTI, Op2.front()->getType(), Op2.size()))
9807- return false;
9808- // Enable split node, only if all nodes are power-of-2/full registers.
9809- unsigned Op1Cnt = 0, Op2Cnt = Op1.size();
9810- for (unsigned Idx : seq<unsigned>(VL.size())) {
9811- if (Op1Indices.test(Idx)) {
9812- ReorderIndices[Op1Cnt] = Idx;
9813- ++Op1Cnt;
9814- } else {
9815- ReorderIndices[Op2Cnt] = Idx;
9816- ++Op2Cnt;
9817- }
9818- }
9819- if (isIdentityOrder(ReorderIndices))
9820- ReorderIndices.clear();
9821- SmallVector<int> Mask;
9822- if (!ReorderIndices.empty())
9823- inversePermutation(ReorderIndices, Mask);
9824- unsigned NumParts = TTI.getNumberOfParts(VecTy);
9825- VectorType *Op1VecTy = getWidenedType(ScalarTy, Op1.size());
9826- VectorType *Op2VecTy = getWidenedType(ScalarTy, Op2.size());
9827- // Check non-profitable single register ops, which better to be represented
9828- // as alternate ops.
9829- if (NumParts >= VL.size())
9830- return false;
9831- if ((LocalState.getMainOp()->isBinaryOp() &&
9832- LocalState.getAltOp()->isBinaryOp() &&
9833- (LocalState.isShiftOp() || LocalState.isBitwiseLogicOp() ||
9834- LocalState.isAddSubLikeOp() || LocalState.isMulDivLikeOp())) ||
9835- (LocalState.getMainOp()->isCast() && LocalState.getAltOp()->isCast()) ||
9836- (LocalState.getMainOp()->isUnaryOp() &&
9837- LocalState.getAltOp()->isUnaryOp())) {
9838- constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
9839- InstructionCost InsertCost = ::getShuffleCost(
9840- TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
9841- FixedVectorType *SubVecTy =
9842- getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
9843- InstructionCost NewShuffleCost =
9844- ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
9845- if (NumParts <= 1 && (Mask.empty() || InsertCost >= NewShuffleCost))
9846- return false;
9847- InstructionCost OriginalVecOpsCost =
9848- TTI.getArithmeticInstrCost(Opcode0, VecTy, Kind) +
9849- TTI.getArithmeticInstrCost(Opcode1, VecTy, Kind);
9850- SmallVector<int> OriginalMask(VL.size(), PoisonMaskElem);
9851- for (unsigned Idx : seq<unsigned>(VL.size())) {
9852- if (isa<PoisonValue>(VL[Idx]))
9853- continue;
9854- OriginalMask[Idx] = Idx + (Op1Indices.test(Idx) ? 0 : VL.size());
9855- }
9856- InstructionCost OriginalCost =
9857- OriginalVecOpsCost + ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc,
9858- VecTy, OriginalMask, Kind);
9859- InstructionCost NewVecOpsCost =
9860- TTI.getArithmeticInstrCost(Opcode0, Op1VecTy, Kind) +
9861- TTI.getArithmeticInstrCost(Opcode1, Op2VecTy, Kind);
9862- InstructionCost NewCost =
9863- NewVecOpsCost + InsertCost +
9864- (!VectorizableTree.empty() && VectorizableTree.front()->hasState() &&
9865- VectorizableTree.front()->getOpcode() == Instruction::Store
9866- ? NewShuffleCost
9867- : 0);
9868- // If not profitable to split - exit.
9869- if (NewCost >= OriginalCost)
9870- return false;
9871- }
9872- =======
9873- >>>>>>> upstream/main
98749772
98759773 SmallVector<Value *> NewVL(VL.size());
98769774 copy(Op1, NewVL.begin());
0 commit comments