@@ -1949,6 +1949,9 @@ class BoUpSLP {
19491949
19501950 /// A vector of operand vectors.
19511951 SmallVector<OperandDataVec, 4> OpsVec;
1952+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
1953+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
1954+ unsigned ArgSize;
19521955
19531956 const TargetLibraryInfo &TLI;
19541957 const DataLayout &DL;
@@ -2337,10 +2340,11 @@ class BoUpSLP {
23372340 assert((empty() || VL.size() == getNumLanes()) &&
23382341 "Expected same number of lanes");
23392342 assert(isa<Instruction>(VL[0]) && "Expected instruction");
2343+ unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
2344+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
2345+ // arguments to the intrinsic produces the same result.
23402346 constexpr unsigned IntrinsicNumOperands = 2;
2341- unsigned NumOperands = isa<IntrinsicInst>(VL[0])
2342- ? IntrinsicNumOperands
2343- : cast<Instruction>(VL[0])->getNumOperands();
2347+ ArgSize = isa<IntrinsicInst>(VL[0]) ? IntrinsicNumOperands : NumOperands;
23442348 OpsVec.resize(NumOperands);
23452349 unsigned NumLanes = VL.size();
23462350 for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2366,7 +2370,7 @@ class BoUpSLP {
23662370 }
23672371
23682372 /// \returns the number of operands.
2369- unsigned getNumOperands() const { return OpsVec.size() ; }
2373+ unsigned getNumOperands() const { return ArgSize ; }
23702374
23712375 /// \returns the number of lanes.
23722376 unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2542,7 +2546,8 @@ class BoUpSLP {
25422546 ArrayRef<OperandData> Op0 = OpsVec.front();
25432547 for (const OperandData &Data : Op0)
25442548 UniqueValues.insert(Data.V);
2545- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2549+ for (ArrayRef<OperandData> Op : make_range(
2550+ OpsVec.begin() + 1, OpsVec.begin() + getNumOperands())) {
25462551 if (any_of(Op, [&UniqueValues](const OperandData &Data) {
25472552 return !UniqueValues.contains(Data.V);
25482553 }))
@@ -3064,13 +3069,6 @@ class BoUpSLP {
30643069 SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
30653070 8> &GatheredLoads);
30663071
3067- /// Reorder commutative or alt operands to get better probability of
3068- /// generating vectorized code.
3069- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3070- SmallVectorImpl<Value *> &Left,
3071- SmallVectorImpl<Value *> &Right,
3072- const BoUpSLP &R);
3073-
30743072 /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
30753073 /// users of \p TE and collects the stores. It returns the map from the store
30763074 /// pointers to the collected stores.
@@ -3265,22 +3263,10 @@ class BoUpSLP {
32653263 copy(OpVL, Operands[OpIdx].begin());
32663264 }
32673265
3268- /// Set the operands of this bundle in their original order.
3269- void setOperandsInOrder() {
3270- assert(Operands.empty() && "Already initialized?");
3271- auto *I0 = cast<Instruction>(Scalars[0]);
3272- Operands.resize(I0->getNumOperands());
3273- unsigned NumLanes = Scalars.size();
3274- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3275- OpIdx != NumOperands; ++OpIdx) {
3276- Operands[OpIdx].resize(NumLanes);
3277- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3278- auto *I = cast<Instruction>(Scalars[Lane]);
3279- assert(I->getNumOperands() == NumOperands &&
3280- "Expected same number of operands");
3281- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3282- }
3283- }
3266+ /// Set this bundle's operand from \p Ops.
3267+ void setOperand(const VLOperands &Ops, unsigned NumOperands) {
3268+ for (unsigned I : seq(NumOperands))
3269+ setOperand(I, Ops.getVL(I));
32843270 }
32853271
32863272 /// Reorders operands of the node to the given mask \p Mask.
@@ -8329,7 +8315,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
83298315 {}, CurrentOrder);
83308316 LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
83318317
8332- TE->setOperandsInOrder();
8318+ VLOperands Ops(VL, *this);
8319+ TE->setOperand(Ops, VL0->getNumOperands());
83338320 buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
83348321 return;
83358322 }
@@ -8350,27 +8337,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
83508337 LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
83518338 else
83528339 LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8353- TE->setOperandsInOrder();
83548340 break;
83558341 case TreeEntry::StridedVectorize:
83568342 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
83578343 TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
83588344 UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8359- TE->setOperandsInOrder();
83608345 LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
83618346 break;
83628347 case TreeEntry::ScatterVectorize:
83638348 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
83648349 TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
83658350 UserTreeIdx, ReuseShuffleIndices);
8366- TE->setOperandsInOrder();
8367- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
83688351 LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
83698352 break;
83708353 case TreeEntry::CombinedVectorize:
83718354 case TreeEntry::NeedToGather:
83728355 llvm_unreachable("Unexpected loads state.");
83738356 }
8357+ VLOperands Ops(VL, *this);
8358+ TE->setOperand(Ops, VL0->getNumOperands());
8359+ if (State == TreeEntry::ScatterVectorize)
8360+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
83748361 return;
83758362 }
83768363 case Instruction::ZExt:
@@ -8408,8 +8395,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84088395 ReuseShuffleIndices);
84098396 LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
84108397
8411- TE->setOperandsInOrder();
8412- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8398+ VLOperands Ops(VL, *this);
8399+ TE->setOperand(Ops, VL0->getNumOperands());
8400+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
84138401 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
84148402 if (ShuffleOrOp == Instruction::Trunc) {
84158403 ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8436,12 +8424,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84368424 LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
84378425
84388426 ValueList Left, Right;
8427+ VLOperands Ops(VL, *this);
84398428 if (cast<CmpInst>(VL0)->isCommutative()) {
84408429 // Commutative predicate - collect + sort operands of the instructions
84418430 // so that each side is more likely to have the same opcode.
84428431 assert(P0 == CmpInst::getSwappedPredicate(P0) &&
84438432 "Commutative Predicate mismatch");
8444- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8433+ Ops.reorder();
8434+ Left = Ops.getVL(0);
8435+ Right = Ops.getVL(1);
84458436 } else {
84468437 // Collect operands - commute if it uses the swapped predicate.
84478438 for (Value *V : VL) {
@@ -8497,20 +8488,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84978488 ReuseShuffleIndices);
84988489 LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
84998490
8491+ VLOperands Ops(VL, *this);
85008492 // Sort operands of the instructions so that each side is more likely to
85018493 // have the same opcode.
8502- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8503- ValueList Left, Right;
8504- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8505- TE->setOperand(0, Left);
8506- TE->setOperand(1, Right);
8507- buildTree_rec(Left, Depth + 1, {TE, 0});
8508- buildTree_rec(Right, Depth + 1, {TE, 1});
8509- return;
8510- }
8511-
8512- TE->setOperandsInOrder();
8513- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8494+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0))
8495+ Ops.reorder();
8496+ TE->setOperand(Ops, VL0->getNumOperands());
8497+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
85148498 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85158499 return;
85168500 }
@@ -8575,7 +8559,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85758559 fixupOrderingIndices(CurrentOrder);
85768560 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
85778561 ReuseShuffleIndices, CurrentOrder);
8578- TE->setOperandsInOrder();
8562+ VLOperands Ops(VL, *this);
8563+ TE->setOperand(Ops, VL0->getNumOperands());
85798564 buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
85808565 if (Consecutive)
85818566 LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8591,46 +8576,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85918576
85928577 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
85938578 ReuseShuffleIndices);
8579+ VLOperands Ops(VL, *this);
85948580 // Sort operands of the instructions so that each side is more likely to
85958581 // have the same opcode.
8596- if (isCommutative(VL0)) {
8597- ValueList Left, Right;
8598- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8599- TE->setOperand(0, Left);
8600- TE->setOperand(1, Right);
8601- SmallVector<ValueList> Operands;
8602- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8603- Operands.emplace_back();
8604- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8605- continue;
8606- for (Value *V : VL) {
8607- auto *CI2 = cast<CallInst>(V);
8608- Operands.back().push_back(CI2->getArgOperand(I));
8609- }
8610- TE->setOperand(I, Operands.back());
8611- }
8612- buildTree_rec(Left, Depth + 1, {TE, 0});
8613- buildTree_rec(Right, Depth + 1, {TE, 1});
8614- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8615- if (Operands[I - 2].empty())
8616- continue;
8617- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8618- }
8619- return;
8620- }
8621- TE->setOperandsInOrder();
8622- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8582+ if (isCommutative(VL0))
8583+ Ops.reorder();
8584+ TE->setOperand(Ops, VL0->getNumOperands());
8585+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
86238586 // For scalar operands no need to create an entry since no need to
86248587 // vectorize it.
86258588 if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
86268589 continue;
8627- ValueList Operands;
8628- // Prepare the operand vector.
8629- for (Value *V : VL) {
8630- auto *CI2 = cast<CallInst>(V);
8631- Operands.push_back(CI2->getArgOperand(I));
8632- }
8633- buildTree_rec(Operands, Depth + 1, {TE, I});
8590+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86348591 }
86358592 return;
86368593 }
@@ -8639,21 +8596,22 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86398596 ReuseShuffleIndices);
86408597 LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
86418598
8599+ VLOperands Ops(VL, *this);
86428600 // Reorder operands if reordering would enable vectorization.
86438601 auto *CI = dyn_cast<CmpInst>(VL0);
86448602 if (isa<BinaryOperator>(VL0) || CI) {
8645- ValueList Left, Right;
86468603 if (!CI || all_of(VL, [](Value *V) {
86478604 return cast<CmpInst>(V)->isCommutative();
86488605 })) {
8649- reorderInputsAccordingToOpcode(VL, Left, Right, *this );
8606+ Ops.reorder( );
86508607 } else {
86518608 auto *MainCI = cast<CmpInst>(S.MainOp);
86528609 auto *AltCI = cast<CmpInst>(S.AltOp);
86538610 CmpInst::Predicate MainP = MainCI->getPredicate();
86548611 CmpInst::Predicate AltP = AltCI->getPredicate();
86558612 assert(MainP != AltP &&
86568613 "Expected different main/alternate predicates.");
8614+ ValueList Left, Right;
86578615 // Collect operands - commute if it uses the swapped predicate or
86588616 // alternate operation.
86598617 for (Value *V : VL) {
@@ -8671,16 +8629,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86718629 Left.push_back(LHS);
86728630 Right.push_back(RHS);
86738631 }
8632+ TE->setOperand(0, Left);
8633+ TE->setOperand(1, Right);
8634+ buildTree_rec(Left, Depth + 1, {TE, 0});
8635+ buildTree_rec(Right, Depth + 1, {TE, 1});
8636+ return;
86748637 }
8675- TE->setOperand(0, Left);
8676- TE->setOperand(1, Right);
8677- buildTree_rec(Left, Depth + 1, {TE, 0});
8678- buildTree_rec(Right, Depth + 1, {TE, 1});
8679- return;
86808638 }
86818639
8682- TE->setOperandsInOrder( );
8683- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8640+ TE->setOperand(Ops, VL0->getNumOperands() );
8641+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
86848642 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86858643 return;
86868644 }
@@ -13300,21 +13258,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1330013258 return Cost;
1330113259}
1330213260
13303- // Perform operand reordering on the instructions in VL and return the reordered
13304- // operands in Left and Right.
13305- void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13306- SmallVectorImpl<Value *> &Left,
13307- SmallVectorImpl<Value *> &Right,
13308- const BoUpSLP &R) {
13309- if (VL.empty())
13310- return;
13311- VLOperands Ops(VL, R);
13312- // Reorder the operands in place.
13313- Ops.reorder();
13314- Left = Ops.getVL(0);
13315- Right = Ops.getVL(1);
13316- }
13317-
1331813261Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1331913262 auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1332013263 if (Res)
0 commit comments