@@ -2019,6 +2019,9 @@ class BoUpSLP {
20192019
20202020 /// A vector of operand vectors.
20212021 SmallVector<OperandDataVec, 4> OpsVec;
2022+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2023+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
2024+ unsigned ArgSize = 0;
20222025
20232026 const TargetLibraryInfo &TLI;
20242027 const DataLayout &DL;
@@ -2402,14 +2405,15 @@ class BoUpSLP {
24022405 }
24032406
24042407 /// Go through the instructions in VL and append their operands.
2405- void appendOperandsOfVL(ArrayRef<Value *> VL) {
2408+ void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0 ) {
24062409 assert(!VL.empty() && "Bad VL");
24072410 assert((empty() || VL.size() == getNumLanes()) &&
24082411 "Expected same number of lanes");
2412+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
2413+ // arguments to the intrinsic produces the same result.
24092414 constexpr unsigned IntrinsicNumOperands = 2;
2410- auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2411- unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2412- : VL0->getNumOperands();
2415+ unsigned NumOperands = VL0->getNumOperands();
2416+ ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
24132417 OpsVec.resize(NumOperands);
24142418 unsigned NumLanes = VL.size();
24152419 for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2442,7 +2446,7 @@ class BoUpSLP {
24422446 }
24432447
24442448 /// \returns the number of operands.
2445- unsigned getNumOperands() const { return OpsVec.size() ; }
2449+ unsigned getNumOperands() const { return ArgSize ; }
24462450
24472451 /// \returns the number of lanes.
24482452 unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2543,13 +2547,11 @@ class BoUpSLP {
25432547
25442548 public:
25452549 /// Initialize with all the operands of the instruction vector \p RootVL.
2546- VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
2550+ VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
25472551 : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
2548- L(R.LI->getLoopFor(
2549- (cast<Instruction>(*find_if(RootVL, IsaPred<Instruction>))
2550- ->getParent()))) {
2552+ L(R.LI->getLoopFor((VL0->getParent()))) {
25512553 // Append all the operands of RootVL.
2552- appendOperandsOfVL(RootVL);
2554+ appendOperandsOfVL(RootVL, VL0 );
25532555 }
25542556
25552557 /// \Returns a value vector with the operands across all lanes for the
@@ -2623,7 +2625,8 @@ class BoUpSLP {
26232625 ArrayRef<OperandData> Op0 = OpsVec.front();
26242626 for (const OperandData &Data : Op0)
26252627 UniqueValues.insert(Data.V);
2626- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2628+ for (ArrayRef<OperandData> Op :
2629+ ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
26272630 if (any_of(Op, [&UniqueValues](const OperandData &Data) {
26282631 return !UniqueValues.contains(Data.V);
26292632 }))
@@ -3144,13 +3147,6 @@ class BoUpSLP {
31443147 SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
31453148 8> &GatheredLoads);
31463149
3147- /// Reorder commutative or alt operands to get better probability of
3148- /// generating vectorized code.
3149- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3150- SmallVectorImpl<Value *> &Left,
3151- SmallVectorImpl<Value *> &Right,
3152- const BoUpSLP &R);
3153-
31543150 /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
31553151 /// users of \p TE and collects the stores. It returns the map from the store
31563152 /// pointers to the collected stores.
@@ -3345,27 +3341,13 @@ class BoUpSLP {
33453341 copy(OpVL, Operands[OpIdx].begin());
33463342 }
33473343
3348- /// Set the operands of this bundle in their original order.
3349- void setOperandsInOrder() {
3350- assert(Operands.empty() && "Already initialized?");
3351- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3352- Operands.resize(I0->getNumOperands());
3353- unsigned NumLanes = Scalars.size();
3354- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3355- OpIdx != NumOperands; ++OpIdx) {
3356- Operands[OpIdx].resize(NumLanes);
3357- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3358- if (isa<PoisonValue>(Scalars[Lane])) {
3359- Operands[OpIdx][Lane] =
3360- PoisonValue::get(I0->getOperand(OpIdx)->getType());
3361- continue;
3362- }
3363- auto *I = cast<Instruction>(Scalars[Lane]);
3364- assert(I->getNumOperands() == NumOperands &&
3365- "Expected same number of operands");
3366- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3367- }
3368- }
3344+ /// Set this bundle's operand from Scalars.
3345+ void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
3346+ VLOperands Ops(Scalars, MainOp, R);
3347+ if (RequireReorder)
3348+ Ops.reorder();
3349+ for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
3350+ setOperand(I, Ops.getVL(I));
33693351 }
33703352
33713353 /// Reorders operands of the node to the given mask \p Mask.
@@ -8471,7 +8453,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84718453 {}, CurrentOrder);
84728454 LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
84738455
8474- TE->setOperandsInOrder( );
8456+ TE->setOperand(*this );
84758457 buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
84768458 return;
84778459 }
@@ -8492,27 +8474,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84928474 LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
84938475 else
84948476 LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8495- TE->setOperandsInOrder();
84968477 break;
84978478 case TreeEntry::StridedVectorize:
84988479 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
84998480 TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
85008481 UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8501- TE->setOperandsInOrder();
85028482 LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
85038483 break;
85048484 case TreeEntry::ScatterVectorize:
85058485 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
85068486 TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
85078487 UserTreeIdx, ReuseShuffleIndices);
8508- TE->setOperandsInOrder();
8509- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
85108488 LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
85118489 break;
85128490 case TreeEntry::CombinedVectorize:
85138491 case TreeEntry::NeedToGather:
85148492 llvm_unreachable("Unexpected loads state.");
85158493 }
8494+ TE->setOperand(*this);
8495+ if (State == TreeEntry::ScatterVectorize)
8496+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
85168497 return;
85178498 }
85188499 case Instruction::ZExt:
@@ -8550,8 +8531,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85508531 ReuseShuffleIndices);
85518532 LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
85528533
8553- TE->setOperandsInOrder( );
8554- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8534+ TE->setOperand(*this );
8535+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
85558536 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85568537 if (ShuffleOrOp == Instruction::Trunc) {
85578538 ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8578,12 +8559,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85788559 LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
85798560
85808561 ValueList Left, Right;
8562+ VLOperands Ops(VL, VL0, *this);
85818563 if (cast<CmpInst>(VL0)->isCommutative()) {
85828564 // Commutative predicate - collect + sort operands of the instructions
85838565 // so that each side is more likely to have the same opcode.
85848566 assert(P0 == CmpInst::getSwappedPredicate(P0) &&
85858567 "Commutative Predicate mismatch");
8586- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8568+ Ops.reorder();
8569+ Left = Ops.getVL(0);
8570+ Right = Ops.getVL(1);
85878571 } else {
85888572 // Collect operands - commute if it uses the swapped predicate.
85898573 for (Value *V : VL) {
@@ -8644,20 +8628,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86448628 ReuseShuffleIndices);
86458629 LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
86468630
8647- // Sort operands of the instructions so that each side is more likely to
8648- // have the same opcode.
8649- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8650- ValueList Left, Right;
8651- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8652- TE->setOperand(0, Left);
8653- TE->setOperand(1, Right);
8654- buildTree_rec(Left, Depth + 1, {TE, 0});
8655- buildTree_rec(Right, Depth + 1, {TE, 1});
8656- return;
8657- }
8658-
8659- TE->setOperandsInOrder();
8660- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8631+ TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8632+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
86618633 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86628634 return;
86638635 }
@@ -8722,7 +8694,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87228694 fixupOrderingIndices(CurrentOrder);
87238695 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87248696 ReuseShuffleIndices, CurrentOrder);
8725- TE->setOperandsInOrder( );
8697+ TE->setOperand(*this );
87268698 buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
87278699 if (Consecutive)
87288700 LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8738,46 +8710,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87388710
87398711 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87408712 ReuseShuffleIndices);
8741- // Sort operands of the instructions so that each side is more likely to
8742- // have the same opcode.
8743- if (isCommutative(VL0)) {
8744- ValueList Left, Right;
8745- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8746- TE->setOperand(0, Left);
8747- TE->setOperand(1, Right);
8748- SmallVector<ValueList> Operands;
8749- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750- Operands.emplace_back();
8751- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8752- continue;
8753- for (Value *V : VL) {
8754- auto *CI2 = cast<CallInst>(V);
8755- Operands.back().push_back(CI2->getArgOperand(I));
8756- }
8757- TE->setOperand(I, Operands.back());
8758- }
8759- buildTree_rec(Left, Depth + 1, {TE, 0});
8760- buildTree_rec(Right, Depth + 1, {TE, 1});
8761- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8762- if (Operands[I - 2].empty())
8763- continue;
8764- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8765- }
8766- return;
8767- }
8768- TE->setOperandsInOrder();
8769- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8713+ TE->setOperand(*this, isCommutative(VL0));
8714+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
87708715 // For scalar operands no need to create an entry since no need to
87718716 // vectorize it.
87728717 if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
87738718 continue;
8774- ValueList Operands;
8775- // Prepare the operand vector.
8776- for (Value *V : VL) {
8777- auto *CI2 = cast<CallInst>(V);
8778- Operands.push_back(CI2->getArgOperand(I));
8779- }
8780- buildTree_rec(Operands, Depth + 1, {TE, I});
8719+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
87818720 }
87828721 return;
87838722 }
@@ -8788,43 +8727,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87888727
87898728 // Reorder operands if reordering would enable vectorization.
87908729 auto *CI = dyn_cast<CmpInst>(VL0);
8791- if (isa<BinaryOperator>(VL0) || CI) {
8730+ if (CI && any_of(VL, [](Value *V) {
8731+ return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8732+ })) {
8733+ auto *MainCI = cast<CmpInst>(S.getMainOp());
8734+ auto *AltCI = cast<CmpInst>(S.getAltOp());
8735+ CmpInst::Predicate MainP = MainCI->getPredicate();
8736+ CmpInst::Predicate AltP = AltCI->getPredicate();
8737+ assert(MainP != AltP &&
8738+ "Expected different main/alternate predicates.");
87928739 ValueList Left, Right;
8793- if (!CI || all_of(VL, [](Value *V) {
8794- return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8795- })) {
8796- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8797- } else {
8798- auto *MainCI = cast<CmpInst>(S.getMainOp());
8799- auto *AltCI = cast<CmpInst>(S.getAltOp());
8800- CmpInst::Predicate MainP = MainCI->getPredicate();
8801- CmpInst::Predicate AltP = AltCI->getPredicate();
8802- assert(MainP != AltP &&
8803- "Expected different main/alternate predicates.");
8804- // Collect operands - commute if it uses the swapped predicate or
8805- // alternate operation.
8806- for (Value *V : VL) {
8807- if (isa<PoisonValue>(V)) {
8808- Left.push_back(
8809- PoisonValue::get(MainCI->getOperand(0)->getType()));
8810- Right.push_back(
8811- PoisonValue::get(MainCI->getOperand(1)->getType()));
8812- continue;
8813- }
8814- auto *Cmp = cast<CmpInst>(V);
8815- Value *LHS = Cmp->getOperand(0);
8816- Value *RHS = Cmp->getOperand(1);
8740+ // Collect operands - commute if it uses the swapped predicate or
8741+ // alternate operation.
8742+ for (Value *V : VL) {
8743+ if (isa<PoisonValue>(V)) {
8744+ Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8745+ Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8746+ continue;
8747+ }
8748+ auto *Cmp = cast<CmpInst>(V);
8749+ Value *LHS = Cmp->getOperand(0);
8750+ Value *RHS = Cmp->getOperand(1);
88178751
8818- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8819- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8820- std::swap(LHS, RHS);
8821- } else {
8822- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8823- std::swap(LHS, RHS);
8824- }
8825- Left.push_back(LHS);
8826- Right.push_back(RHS);
8752+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8753+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8754+ std::swap(LHS, RHS);
8755+ } else {
8756+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8757+ std::swap(LHS, RHS);
88278758 }
8759+ Left.push_back(LHS);
8760+ Right.push_back(RHS);
88288761 }
88298762 TE->setOperand(0, Left);
88308763 TE->setOperand(1, Right);
@@ -8833,8 +8766,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
88338766 return;
88348767 }
88358768
8836- TE->setOperandsInOrder( );
8837- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8769+ TE->setOperand(*this, isa<BinaryOperator>(VL0) || CI );
8770+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
88388771 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
88398772 return;
88408773 }
@@ -13539,21 +13472,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1353913472 return Cost;
1354013473}
1354113474
13542- // Perform operand reordering on the instructions in VL and return the reordered
13543- // operands in Left and Right.
13544- void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13545- SmallVectorImpl<Value *> &Left,
13546- SmallVectorImpl<Value *> &Right,
13547- const BoUpSLP &R) {
13548- if (VL.empty())
13549- return;
13550- VLOperands Ops(VL, R);
13551- // Reorder the operands in place.
13552- Ops.reorder();
13553- Left = Ops.getVL(0);
13554- Right = Ops.getVL(1);
13555- }
13556-
1355713475Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1355813476 auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1355913477 if (Res)
0 commit comments