@@ -2017,6 +2017,9 @@ class BoUpSLP {
20172017
20182018 /// A vector of operand vectors.
20192019 SmallVector<OperandDataVec, 4> OpsVec;
2020+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2021+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
2022+ unsigned ArgSize = 0;
20202023
20212024 const TargetLibraryInfo &TLI;
20222025 const DataLayout &DL;
@@ -2404,10 +2407,12 @@ class BoUpSLP {
24042407 assert(!VL.empty() && "Bad VL");
24052408 assert((empty() || VL.size() == getNumLanes()) &&
24062409 "Expected same number of lanes");
2410+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
2411+ // arguments to the intrinsic produces the same result.
24072412 constexpr unsigned IntrinsicNumOperands = 2;
24082413 auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2409- unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2410- : VL0->getNumOperands() ;
2414+ unsigned NumOperands = VL0->getNumOperands();
2415+ ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands ;
24112416 OpsVec.resize(NumOperands);
24122417 unsigned NumLanes = VL.size();
24132418 for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2440,7 +2445,7 @@ class BoUpSLP {
24402445 }
24412446
24422447 /// \returns the number of operands.
2443- unsigned getNumOperands() const { return OpsVec.size() ; }
2448+ unsigned getNumOperands() const { return ArgSize ; }
24442449
24452450 /// \returns the number of lanes.
24462451 unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2617,7 +2622,8 @@ class BoUpSLP {
26172622 ArrayRef<OperandData> Op0 = OpsVec.front();
26182623 for (const OperandData &Data : Op0)
26192624 UniqueValues.insert(Data.V);
2620- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2625+ for (ArrayRef<OperandData> Op :
2626+ ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
26212627 if (any_of(Op, [&UniqueValues](const OperandData &Data) {
26222628 return !UniqueValues.contains(Data.V);
26232629 }))
@@ -3138,13 +3144,6 @@ class BoUpSLP {
31383144 SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
31393145 8> &GatheredLoads);
31403146
3141- /// Reorder commutative or alt operands to get better probability of
3142- /// generating vectorized code.
3143- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3144- SmallVectorImpl<Value *> &Left,
3145- SmallVectorImpl<Value *> &Right,
3146- const BoUpSLP &R);
3147-
31483147 /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
31493148 /// users of \p TE and collects the stores. It returns the map from the store
31503149 /// pointers to the collected stores.
@@ -3339,27 +3338,15 @@ class BoUpSLP {
33393338 copy(OpVL, Operands[OpIdx].begin());
33403339 }
33413340
3342- /// Set the operands of this bundle in their original order.
3343- void setOperandsInOrder() {
3344- assert(Operands.empty() && "Already initialized?");
3345- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3346- Operands.resize(I0->getNumOperands());
3347- unsigned NumLanes = Scalars.size();
3348- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3349- OpIdx != NumOperands; ++OpIdx) {
3350- Operands[OpIdx].resize(NumLanes);
3351- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3352- if (isa<PoisonValue>(Scalars[Lane])) {
3353- Operands[OpIdx][Lane] =
3354- PoisonValue::get(I0->getOperand(OpIdx)->getType());
3355- continue;
3356- }
3357- auto *I = cast<Instruction>(Scalars[Lane]);
3358- assert(I->getNumOperands() == NumOperands &&
3359- "Expected same number of operands");
3360- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3361- }
3362- }
3341+ /// Set this bundle's operand from \p VL.
3342+ void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
3343+ bool RequireReorder = false) {
3344+ VLOperands Ops(VL, R);
3345+ if (RequireReorder)
3346+ Ops.reorder();
3347+ for (unsigned I :
3348+ seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
3349+ setOperand(I, Ops.getVL(I));
33633350 }
33643351
33653352 /// Reorders operands of the node to the given mask \p Mask.
@@ -8459,7 +8446,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84598446 {}, CurrentOrder);
84608447 LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
84618448
8462- TE->setOperandsInOrder( );
8449+ TE->setOperand(VL, *this );
84638450 buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
84648451 return;
84658452 }
@@ -8480,27 +8467,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84808467 LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
84818468 else
84828469 LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8483- TE->setOperandsInOrder();
84848470 break;
84858471 case TreeEntry::StridedVectorize:
84868472 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
84878473 TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
84888474 UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8489- TE->setOperandsInOrder();
84908475 LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
84918476 break;
84928477 case TreeEntry::ScatterVectorize:
84938478 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
84948479 TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
84958480 UserTreeIdx, ReuseShuffleIndices);
8496- TE->setOperandsInOrder();
8497- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
84988481 LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
84998482 break;
85008483 case TreeEntry::CombinedVectorize:
85018484 case TreeEntry::NeedToGather:
85028485 llvm_unreachable("Unexpected loads state.");
85038486 }
8487+ TE->setOperand(VL, *this);
8488+ if (State == TreeEntry::ScatterVectorize)
8489+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
85048490 return;
85058491 }
85068492 case Instruction::ZExt:
@@ -8538,8 +8524,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85388524 ReuseShuffleIndices);
85398525 LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
85408526
8541- TE->setOperandsInOrder( );
8542- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8527+ TE->setOperand(VL, *this );
8528+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
85438529 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85448530 if (ShuffleOrOp == Instruction::Trunc) {
85458531 ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8566,12 +8552,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85668552 LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
85678553
85688554 ValueList Left, Right;
8555+ VLOperands Ops(VL, *this);
85698556 if (cast<CmpInst>(VL0)->isCommutative()) {
85708557 // Commutative predicate - collect + sort operands of the instructions
85718558 // so that each side is more likely to have the same opcode.
85728559 assert(P0 == CmpInst::getSwappedPredicate(P0) &&
85738560 "Commutative Predicate mismatch");
8574- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8561+ Ops.reorder();
8562+ Left = Ops.getVL(0);
8563+ Right = Ops.getVL(1);
85758564 } else {
85768565 // Collect operands - commute if it uses the swapped predicate.
85778566 for (Value *V : VL) {
@@ -8632,20 +8621,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86328621 ReuseShuffleIndices);
86338622 LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
86348623
8635- // Sort operands of the instructions so that each side is more likely to
8636- // have the same opcode.
8637- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8638- ValueList Left, Right;
8639- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8640- TE->setOperand(0, Left);
8641- TE->setOperand(1, Right);
8642- buildTree_rec(Left, Depth + 1, {TE, 0});
8643- buildTree_rec(Right, Depth + 1, {TE, 1});
8644- return;
8645- }
8646-
8647- TE->setOperandsInOrder();
8648- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8624+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8625+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
86498626 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86508627 return;
86518628 }
@@ -8710,7 +8687,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87108687 fixupOrderingIndices(CurrentOrder);
87118688 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87128689 ReuseShuffleIndices, CurrentOrder);
8713- TE->setOperandsInOrder( );
8690+ TE->setOperand(VL, *this );
87148691 buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
87158692 if (Consecutive)
87168693 LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8726,46 +8703,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87268703
87278704 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87288705 ReuseShuffleIndices);
8729- // Sort operands of the instructions so that each side is more likely to
8730- // have the same opcode.
8731- if (isCommutative(VL0)) {
8732- ValueList Left, Right;
8733- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8734- TE->setOperand(0, Left);
8735- TE->setOperand(1, Right);
8736- SmallVector<ValueList> Operands;
8737- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8738- Operands.emplace_back();
8739- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8740- continue;
8741- for (Value *V : VL) {
8742- auto *CI2 = cast<CallInst>(V);
8743- Operands.back().push_back(CI2->getArgOperand(I));
8744- }
8745- TE->setOperand(I, Operands.back());
8746- }
8747- buildTree_rec(Left, Depth + 1, {TE, 0});
8748- buildTree_rec(Right, Depth + 1, {TE, 1});
8749- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750- if (Operands[I - 2].empty())
8751- continue;
8752- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8753- }
8754- return;
8755- }
8756- TE->setOperandsInOrder();
8757- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8706+ TE->setOperand(VL, *this, isCommutative(VL0));
8707+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
87588708 // For scalar operands no need to create an entry since no need to
87598709 // vectorize it.
87608710 if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
87618711 continue;
8762- ValueList Operands;
8763- // Prepare the operand vector.
8764- for (Value *V : VL) {
8765- auto *CI2 = cast<CallInst>(V);
8766- Operands.push_back(CI2->getArgOperand(I));
8767- }
8768- buildTree_rec(Operands, Depth + 1, {TE, I});
8712+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
87698713 }
87708714 return;
87718715 }
@@ -8776,43 +8720,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87768720
87778721 // Reorder operands if reordering would enable vectorization.
87788722 auto *CI = dyn_cast<CmpInst>(VL0);
8779- if (isa<BinaryOperator>(VL0) || CI) {
8723+ if (CI && any_of(VL, [](Value *V) {
8724+ return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8725+ })) {
8726+ auto *MainCI = cast<CmpInst>(S.MainOp);
8727+ auto *AltCI = cast<CmpInst>(S.AltOp);
8728+ CmpInst::Predicate MainP = MainCI->getPredicate();
8729+ CmpInst::Predicate AltP = AltCI->getPredicate();
8730+ assert(MainP != AltP &&
8731+ "Expected different main/alternate predicates.");
87808732 ValueList Left, Right;
8781- if (!CI || all_of(VL, [](Value *V) {
8782- return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8783- })) {
8784- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8785- } else {
8786- auto *MainCI = cast<CmpInst>(S.MainOp);
8787- auto *AltCI = cast<CmpInst>(S.AltOp);
8788- CmpInst::Predicate MainP = MainCI->getPredicate();
8789- CmpInst::Predicate AltP = AltCI->getPredicate();
8790- assert(MainP != AltP &&
8791- "Expected different main/alternate predicates.");
8792- // Collect operands - commute if it uses the swapped predicate or
8793- // alternate operation.
8794- for (Value *V : VL) {
8795- if (isa<PoisonValue>(V)) {
8796- Left.push_back(
8797- PoisonValue::get(MainCI->getOperand(0)->getType()));
8798- Right.push_back(
8799- PoisonValue::get(MainCI->getOperand(1)->getType()));
8800- continue;
8801- }
8802- auto *Cmp = cast<CmpInst>(V);
8803- Value *LHS = Cmp->getOperand(0);
8804- Value *RHS = Cmp->getOperand(1);
8733+ // Collect operands - commute if it uses the swapped predicate or
8734+ // alternate operation.
8735+ for (Value *V : VL) {
8736+ if (isa<PoisonValue>(V)) {
8737+ Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8738+ Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8739+ continue;
8740+ }
8741+ auto *Cmp = cast<CmpInst>(V);
8742+ Value *LHS = Cmp->getOperand(0);
8743+ Value *RHS = Cmp->getOperand(1);
88058744
8806- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8807- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8808- std::swap(LHS, RHS);
8809- } else {
8810- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8811- std::swap(LHS, RHS);
8812- }
8813- Left.push_back(LHS);
8814- Right.push_back(RHS);
8745+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8746+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8747+ std::swap(LHS, RHS);
8748+ } else {
8749+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8750+ std::swap(LHS, RHS);
88158751 }
8752+ Left.push_back(LHS);
8753+ Right.push_back(RHS);
88168754 }
88178755 TE->setOperand(0, Left);
88188756 TE->setOperand(1, Right);
@@ -8821,8 +8759,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
88218759 return;
88228760 }
88238761
8824- TE->setOperandsInOrder( );
8825- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8762+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI );
8763+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
88268764 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
88278765 return;
88288766 }
@@ -13526,21 +13464,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1352613464 return Cost;
1352713465}
1352813466
13529- // Perform operand reordering on the instructions in VL and return the reordered
13530- // operands in Left and Right.
13531- void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13532- SmallVectorImpl<Value *> &Left,
13533- SmallVectorImpl<Value *> &Right,
13534- const BoUpSLP &R) {
13535- if (VL.empty())
13536- return;
13537- VLOperands Ops(VL, R);
13538- // Reorder the operands in place.
13539- Ops.reorder();
13540- Left = Ops.getVL(0);
13541- Right = Ops.getVL(1);
13542- }
13543-
1354413467Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1354513468 auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1354613469 if (Res)
0 commit comments