@@ -2019,6 +2019,9 @@ class BoUpSLP {
20192019
20202020 /// A vector of operand vectors.
20212021 SmallVector<OperandDataVec, 4> OpsVec;
2022+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2023+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
2024+ unsigned ArgSize = 0;
20222025
20232026 const TargetLibraryInfo &TLI;
20242027 const DataLayout &DL;
@@ -2406,10 +2409,12 @@ class BoUpSLP {
24062409 assert(!VL.empty() && "Bad VL");
24072410 assert((empty() || VL.size() == getNumLanes()) &&
24082411 "Expected same number of lanes");
2412+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
2413+ // arguments to the intrinsic produces the same result.
24092414 constexpr unsigned IntrinsicNumOperands = 2;
24102415 auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2411- unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2412- : VL0->getNumOperands() ;
2416+ unsigned NumOperands = VL0->getNumOperands();
2417+ ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands ;
24132418 OpsVec.resize(NumOperands);
24142419 unsigned NumLanes = VL.size();
24152420 for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2442,7 +2447,7 @@ class BoUpSLP {
24422447 }
24432448
24442449 /// \returns the number of operands.
2445- unsigned getNumOperands() const { return OpsVec.size() ; }
2450+ unsigned getNumOperands() const { return ArgSize ; }
24462451
24472452 /// \returns the number of lanes.
24482453 unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2623,7 +2628,8 @@ class BoUpSLP {
26232628 ArrayRef<OperandData> Op0 = OpsVec.front();
26242629 for (const OperandData &Data : Op0)
26252630 UniqueValues.insert(Data.V);
2626- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2631+ for (ArrayRef<OperandData> Op :
2632+ ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
26272633 if (any_of(Op, [&UniqueValues](const OperandData &Data) {
26282634 return !UniqueValues.contains(Data.V);
26292635 }))
@@ -3144,13 +3150,6 @@ class BoUpSLP {
31443150 SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
31453151 8> &GatheredLoads);
31463152
3147- /// Reorder commutative or alt operands to get better probability of
3148- /// generating vectorized code.
3149- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3150- SmallVectorImpl<Value *> &Left,
3151- SmallVectorImpl<Value *> &Right,
3152- const BoUpSLP &R);
3153-
31543153 /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
31553154 /// users of \p TE and collects the stores. It returns the map from the store
31563155 /// pointers to the collected stores.
@@ -3345,27 +3344,15 @@ class BoUpSLP {
33453344 copy(OpVL, Operands[OpIdx].begin());
33463345 }
33473346
3348- /// Set the operands of this bundle in their original order.
3349- void setOperandsInOrder() {
3350- assert(Operands.empty() && "Already initialized?");
3351- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3352- Operands.resize(I0->getNumOperands());
3353- unsigned NumLanes = Scalars.size();
3354- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3355- OpIdx != NumOperands; ++OpIdx) {
3356- Operands[OpIdx].resize(NumLanes);
3357- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3358- if (isa<PoisonValue>(Scalars[Lane])) {
3359- Operands[OpIdx][Lane] =
3360- PoisonValue::get(I0->getOperand(OpIdx)->getType());
3361- continue;
3362- }
3363- auto *I = cast<Instruction>(Scalars[Lane]);
3364- assert(I->getNumOperands() == NumOperands &&
3365- "Expected same number of operands");
3366- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3367- }
3368- }
3347+ /// Set this bundle's operand from \p VL.
3348+ void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
3349+ bool RequireReorder = false) {
3350+ VLOperands Ops(VL, R);
3351+ if (RequireReorder)
3352+ Ops.reorder();
3353+ for (unsigned I :
3354+ seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
3355+ setOperand(I, Ops.getVL(I));
33693356 }
33703357
33713358 /// Reorders operands of the node to the given mask \p Mask.
@@ -8471,7 +8458,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84718458 {}, CurrentOrder);
84728459 LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
84738460
8474- TE->setOperandsInOrder( );
8461+ TE->setOperand(VL, *this );
84758462 buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
84768463 return;
84778464 }
@@ -8492,27 +8479,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84928479 LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
84938480 else
84948481 LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8495- TE->setOperandsInOrder();
84968482 break;
84978483 case TreeEntry::StridedVectorize:
84988484 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
84998485 TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
85008486 UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8501- TE->setOperandsInOrder();
85028487 LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
85038488 break;
85048489 case TreeEntry::ScatterVectorize:
85058490 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
85068491 TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
85078492 UserTreeIdx, ReuseShuffleIndices);
8508- TE->setOperandsInOrder();
8509- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
85108493 LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
85118494 break;
85128495 case TreeEntry::CombinedVectorize:
85138496 case TreeEntry::NeedToGather:
85148497 llvm_unreachable("Unexpected loads state.");
85158498 }
8499+ TE->setOperand(VL, *this);
8500+ if (State == TreeEntry::ScatterVectorize)
8501+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
85168502 return;
85178503 }
85188504 case Instruction::ZExt:
@@ -8550,8 +8536,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85508536 ReuseShuffleIndices);
85518537 LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
85528538
8553- TE->setOperandsInOrder( );
8554- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8539+ TE->setOperand(VL, *this );
8540+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
85558541 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85568542 if (ShuffleOrOp == Instruction::Trunc) {
85578543 ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8578,12 +8564,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85788564 LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
85798565
85808566 ValueList Left, Right;
8567+ VLOperands Ops(VL, *this);
85818568 if (cast<CmpInst>(VL0)->isCommutative()) {
85828569 // Commutative predicate - collect + sort operands of the instructions
85838570 // so that each side is more likely to have the same opcode.
85848571 assert(P0 == CmpInst::getSwappedPredicate(P0) &&
85858572 "Commutative Predicate mismatch");
8586- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8573+ Ops.reorder();
8574+ Left = Ops.getVL(0);
8575+ Right = Ops.getVL(1);
85878576 } else {
85888577 // Collect operands - commute if it uses the swapped predicate.
85898578 for (Value *V : VL) {
@@ -8644,20 +8633,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86448633 ReuseShuffleIndices);
86458634 LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
86468635
8647- // Sort operands of the instructions so that each side is more likely to
8648- // have the same opcode.
8649- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8650- ValueList Left, Right;
8651- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8652- TE->setOperand(0, Left);
8653- TE->setOperand(1, Right);
8654- buildTree_rec(Left, Depth + 1, {TE, 0});
8655- buildTree_rec(Right, Depth + 1, {TE, 1});
8656- return;
8657- }
8658-
8659- TE->setOperandsInOrder();
8660- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8636+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8637+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
86618638 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86628639 return;
86638640 }
@@ -8722,7 +8699,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87228699 fixupOrderingIndices(CurrentOrder);
87238700 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87248701 ReuseShuffleIndices, CurrentOrder);
8725- TE->setOperandsInOrder( );
8702+ TE->setOperand(VL, *this );
87268703 buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
87278704 if (Consecutive)
87288705 LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8738,46 +8715,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87388715
87398716 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87408717 ReuseShuffleIndices);
8741- // Sort operands of the instructions so that each side is more likely to
8742- // have the same opcode.
8743- if (isCommutative(VL0)) {
8744- ValueList Left, Right;
8745- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8746- TE->setOperand(0, Left);
8747- TE->setOperand(1, Right);
8748- SmallVector<ValueList> Operands;
8749- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750- Operands.emplace_back();
8751- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8752- continue;
8753- for (Value *V : VL) {
8754- auto *CI2 = cast<CallInst>(V);
8755- Operands.back().push_back(CI2->getArgOperand(I));
8756- }
8757- TE->setOperand(I, Operands.back());
8758- }
8759- buildTree_rec(Left, Depth + 1, {TE, 0});
8760- buildTree_rec(Right, Depth + 1, {TE, 1});
8761- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8762- if (Operands[I - 2].empty())
8763- continue;
8764- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8765- }
8766- return;
8767- }
8768- TE->setOperandsInOrder();
8769- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8718+ TE->setOperand(VL, *this, isCommutative(VL0));
8719+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
87708720 // For scalar operands no need to create an entry since no need to
87718721 // vectorize it.
87728722 if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
87738723 continue;
8774- ValueList Operands;
8775- // Prepare the operand vector.
8776- for (Value *V : VL) {
8777- auto *CI2 = cast<CallInst>(V);
8778- Operands.push_back(CI2->getArgOperand(I));
8779- }
8780- buildTree_rec(Operands, Depth + 1, {TE, I});
8724+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
87818725 }
87828726 return;
87838727 }
@@ -8788,43 +8732,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87888732
87898733 // Reorder operands if reordering would enable vectorization.
87908734 auto *CI = dyn_cast<CmpInst>(VL0);
8791- if (isa<BinaryOperator>(VL0) || CI) {
8735+ if (CI && any_of(VL, [](Value *V) {
8736+ return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8737+ })) {
8738+ auto *MainCI = cast<CmpInst>(S.getMainOp());
8739+ auto *AltCI = cast<CmpInst>(S.getAltOp());
8740+ CmpInst::Predicate MainP = MainCI->getPredicate();
8741+ CmpInst::Predicate AltP = AltCI->getPredicate();
8742+ assert(MainP != AltP &&
8743+ "Expected different main/alternate predicates.");
87928744 ValueList Left, Right;
8793- if (!CI || all_of(VL, [](Value *V) {
8794- return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8795- })) {
8796- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8797- } else {
8798- auto *MainCI = cast<CmpInst>(S.getMainOp());
8799- auto *AltCI = cast<CmpInst>(S.getAltOp());
8800- CmpInst::Predicate MainP = MainCI->getPredicate();
8801- CmpInst::Predicate AltP = AltCI->getPredicate();
8802- assert(MainP != AltP &&
8803- "Expected different main/alternate predicates.");
8804- // Collect operands - commute if it uses the swapped predicate or
8805- // alternate operation.
8806- for (Value *V : VL) {
8807- if (isa<PoisonValue>(V)) {
8808- Left.push_back(
8809- PoisonValue::get(MainCI->getOperand(0)->getType()));
8810- Right.push_back(
8811- PoisonValue::get(MainCI->getOperand(1)->getType()));
8812- continue;
8813- }
8814- auto *Cmp = cast<CmpInst>(V);
8815- Value *LHS = Cmp->getOperand(0);
8816- Value *RHS = Cmp->getOperand(1);
8745+ // Collect operands - commute if it uses the swapped predicate or
8746+ // alternate operation.
8747+ for (Value *V : VL) {
8748+ if (isa<PoisonValue>(V)) {
8749+ Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8750+ Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8751+ continue;
8752+ }
8753+ auto *Cmp = cast<CmpInst>(V);
8754+ Value *LHS = Cmp->getOperand(0);
8755+ Value *RHS = Cmp->getOperand(1);
88178756
8818- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8819- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8820- std::swap(LHS, RHS);
8821- } else {
8822- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8823- std::swap(LHS, RHS);
8824- }
8825- Left.push_back(LHS);
8826- Right.push_back(RHS);
8757+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8758+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8759+ std::swap(LHS, RHS);
8760+ } else {
8761+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8762+ std::swap(LHS, RHS);
88278763 }
8764+ Left.push_back(LHS);
8765+ Right.push_back(RHS);
88288766 }
88298767 TE->setOperand(0, Left);
88308768 TE->setOperand(1, Right);
@@ -8833,8 +8771,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
88338771 return;
88348772 }
88358773
8836- TE->setOperandsInOrder( );
8837- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8774+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI );
8775+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
88388776 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
88398777 return;
88408778 }
@@ -13539,21 +13477,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1353913477 return Cost;
1354013478}
1354113479
13542- // Perform operand reordering on the instructions in VL and return the reordered
13543- // operands in Left and Right.
13544- void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13545- SmallVectorImpl<Value *> &Left,
13546- SmallVectorImpl<Value *> &Right,
13547- const BoUpSLP &R) {
13548- if (VL.empty())
13549- return;
13550- VLOperands Ops(VL, R);
13551- // Reorder the operands in place.
13552- Ops.reorder();
13553- Left = Ops.getVL(0);
13554- Right = Ops.getVL(1);
13555- }
13556-
1355713480Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1355813481 auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1355913482 if (Res)
0 commit comments