@@ -2017,9 +2017,6 @@ class BoUpSLP {
20172017
20182018 /// A vector of operand vectors.
20192019 SmallVector<OperandDataVec, 4> OpsVec;
2020- /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2021- /// is not IntrinsicInst, ArgSize is User::getNumOperands.
2022- unsigned ArgSize = 0;
20232020
20242021 const TargetLibraryInfo &TLI;
20252022 const DataLayout &DL;
@@ -2407,12 +2404,10 @@ class BoUpSLP {
24072404 assert(!VL.empty() && "Bad VL");
24082405 assert((empty() || VL.size() == getNumLanes()) &&
24092406 "Expected same number of lanes");
2410- // IntrinsicInst::isCommutative returns true if swapping the first "two"
2411- // arguments to the intrinsic produces the same result.
24122407 constexpr unsigned IntrinsicNumOperands = 2;
24132408 auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2414- unsigned NumOperands = VL0->getNumOperands();
2415- ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands ;
2409+ unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2410+ : VL0->getNumOperands() ;
24162411 OpsVec.resize(NumOperands);
24172412 unsigned NumLanes = VL.size();
24182413 for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2445,7 +2440,7 @@ class BoUpSLP {
24452440 }
24462441
24472442 /// \returns the number of operands.
2448- unsigned getNumOperands() const { return ArgSize ; }
2443+ unsigned getNumOperands() const { return OpsVec.size() ; }
24492444
24502445 /// \returns the number of lanes.
24512446 unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2622,8 +2617,7 @@ class BoUpSLP {
26222617 ArrayRef<OperandData> Op0 = OpsVec.front();
26232618 for (const OperandData &Data : Op0)
26242619 UniqueValues.insert(Data.V);
2625- for (ArrayRef<OperandData> Op :
2626- ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
2620+ for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
26272621 if (any_of(Op, [&UniqueValues](const OperandData &Data) {
26282622 return !UniqueValues.contains(Data.V);
26292623 }))
@@ -3144,6 +3138,13 @@ class BoUpSLP {
31443138 SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
31453139 8> &GatheredLoads);
31463140
3141+ /// Reorder commutative or alt operands to get better probability of
3142+ /// generating vectorized code.
3143+ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3144+ SmallVectorImpl<Value *> &Left,
3145+ SmallVectorImpl<Value *> &Right,
3146+ const BoUpSLP &R);
3147+
31473148 /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
31483149 /// users of \p TE and collects the stores. It returns the map from the store
31493150 /// pointers to the collected stores.
@@ -3338,15 +3339,27 @@ class BoUpSLP {
33383339 copy(OpVL, Operands[OpIdx].begin());
33393340 }
33403341
3341- /// Set this bundle's operand from \p VL.
3342- void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
3343- bool RequireReorder = false) {
3344- VLOperands Ops(VL, R);
3345- if (RequireReorder)
3346- Ops.reorder();
3347- for (unsigned I :
3348- seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
3349- setOperand(I, Ops.getVL(I));
3342+ /// Set the operands of this bundle in their original order.
3343+ void setOperandsInOrder() {
3344+ assert(Operands.empty() && "Already initialized?");
3345+ auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3346+ Operands.resize(I0->getNumOperands());
3347+ unsigned NumLanes = Scalars.size();
3348+ for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3349+ OpIdx != NumOperands; ++OpIdx) {
3350+ Operands[OpIdx].resize(NumLanes);
3351+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3352+ if (isa<PoisonValue>(Scalars[Lane])) {
3353+ Operands[OpIdx][Lane] =
3354+ PoisonValue::get(I0->getOperand(OpIdx)->getType());
3355+ continue;
3356+ }
3357+ auto *I = cast<Instruction>(Scalars[Lane]);
3358+ assert(I->getNumOperands() == NumOperands &&
3359+ "Expected same number of operands");
3360+ Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3361+ }
3362+ }
33503363 }
33513364
33523365 /// Reorders operands of the node to the given mask \p Mask.
@@ -8446,7 +8459,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84468459 {}, CurrentOrder);
84478460 LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
84488461
8449- TE->setOperand(VL, *this );
8462+ TE->setOperandsInOrder( );
84508463 buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
84518464 return;
84528465 }
@@ -8467,26 +8480,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84678480 LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
84688481 else
84698482 LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8483+ TE->setOperandsInOrder();
84708484 break;
84718485 case TreeEntry::StridedVectorize:
84728486 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
84738487 TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
84748488 UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8489+ TE->setOperandsInOrder();
84758490 LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
84768491 break;
84778492 case TreeEntry::ScatterVectorize:
84788493 // Vectorizing non-consecutive loads with `llvm.masked.gather`.
84798494 TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
84808495 UserTreeIdx, ReuseShuffleIndices);
8496+ TE->setOperandsInOrder();
8497+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
84818498 LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
84828499 break;
84838500 case TreeEntry::CombinedVectorize:
84848501 case TreeEntry::NeedToGather:
84858502 llvm_unreachable("Unexpected loads state.");
84868503 }
8487- TE->setOperand(VL, *this);
8488- if (State == TreeEntry::ScatterVectorize)
8489- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
84908504 return;
84918505 }
84928506 case Instruction::ZExt:
@@ -8524,8 +8538,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85248538 ReuseShuffleIndices);
85258539 LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
85268540
8527- TE->setOperand(VL, *this );
8528- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8541+ TE->setOperandsInOrder( );
8542+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
85298543 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85308544 if (ShuffleOrOp == Instruction::Trunc) {
85318545 ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8552,15 +8566,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85528566 LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
85538567
85548568 ValueList Left, Right;
8555- VLOperands Ops(VL, *this);
85568569 if (cast<CmpInst>(VL0)->isCommutative()) {
85578570 // Commutative predicate - collect + sort operands of the instructions
85588571 // so that each side is more likely to have the same opcode.
85598572 assert(P0 == CmpInst::getSwappedPredicate(P0) &&
85608573 "Commutative Predicate mismatch");
8561- Ops.reorder();
8562- Left = Ops.getVL(0);
8563- Right = Ops.getVL(1);
8574+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
85648575 } else {
85658576 // Collect operands - commute if it uses the swapped predicate.
85668577 for (Value *V : VL) {
@@ -8621,8 +8632,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86218632 ReuseShuffleIndices);
86228633 LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
86238634
8624- TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8625- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8635+ // Sort operands of the instructions so that each side is more likely to
8636+ // have the same opcode.
8637+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8638+ ValueList Left, Right;
8639+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8640+ TE->setOperand(0, Left);
8641+ TE->setOperand(1, Right);
8642+ buildTree_rec(Left, Depth + 1, {TE, 0});
8643+ buildTree_rec(Right, Depth + 1, {TE, 1});
8644+ return;
8645+ }
8646+
8647+ TE->setOperandsInOrder();
8648+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
86268649 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86278650 return;
86288651 }
@@ -8687,7 +8710,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86878710 fixupOrderingIndices(CurrentOrder);
86888711 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
86898712 ReuseShuffleIndices, CurrentOrder);
8690- TE->setOperand(VL, *this );
8713+ TE->setOperandsInOrder( );
86918714 buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
86928715 if (Consecutive)
86938716 LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8703,13 +8726,46 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87038726
87048727 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87058728 ReuseShuffleIndices);
8706- TE->setOperand(VL, *this, isCommutative(VL0));
8707- for (unsigned I : seq<unsigned>(CI->arg_size())) {
8729+ // Sort operands of the instructions so that each side is more likely to
8730+ // have the same opcode.
8731+ if (isCommutative(VL0)) {
8732+ ValueList Left, Right;
8733+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8734+ TE->setOperand(0, Left);
8735+ TE->setOperand(1, Right);
8736+ SmallVector<ValueList> Operands;
8737+ for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8738+ Operands.emplace_back();
8739+ if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8740+ continue;
8741+ for (Value *V : VL) {
8742+ auto *CI2 = cast<CallInst>(V);
8743+ Operands.back().push_back(CI2->getArgOperand(I));
8744+ }
8745+ TE->setOperand(I, Operands.back());
8746+ }
8747+ buildTree_rec(Left, Depth + 1, {TE, 0});
8748+ buildTree_rec(Right, Depth + 1, {TE, 1});
8749+ for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750+ if (Operands[I - 2].empty())
8751+ continue;
8752+ buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8753+ }
8754+ return;
8755+ }
8756+ TE->setOperandsInOrder();
8757+ for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
87088758 // For scalar operands no need to create an entry since no need to
87098759 // vectorize it.
87108760 if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
87118761 continue;
8712- buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8762+ ValueList Operands;
8763+ // Prepare the operand vector.
8764+ for (Value *V : VL) {
8765+ auto *CI2 = cast<CallInst>(V);
8766+ Operands.push_back(CI2->getArgOperand(I));
8767+ }
8768+ buildTree_rec(Operands, Depth + 1, {TE, I});
87138769 }
87148770 return;
87158771 }
@@ -8720,37 +8776,43 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87208776
87218777 // Reorder operands if reordering would enable vectorization.
87228778 auto *CI = dyn_cast<CmpInst>(VL0);
8723- if (CI && any_of(VL, [](Value *V) {
8724- return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8725- })) {
8726- auto *MainCI = cast<CmpInst>(S.MainOp);
8727- auto *AltCI = cast<CmpInst>(S.AltOp);
8728- CmpInst::Predicate MainP = MainCI->getPredicate();
8729- CmpInst::Predicate AltP = AltCI->getPredicate();
8730- assert(MainP != AltP &&
8731- "Expected different main/alternate predicates.");
8779+ if (isa<BinaryOperator>(VL0) || CI) {
87328780 ValueList Left, Right;
8733- // Collect operands - commute if it uses the swapped predicate or
8734- // alternate operation.
8735- for (Value *V : VL) {
8736- if (isa<PoisonValue>(V)) {
8737- Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8738- Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8739- continue;
8740- }
8741- auto *Cmp = cast<CmpInst>(V);
8742- Value *LHS = Cmp->getOperand(0);
8743- Value *RHS = Cmp->getOperand(1);
8781+ if (!CI || all_of(VL, [](Value *V) {
8782+ return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8783+ })) {
8784+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8785+ } else {
8786+ auto *MainCI = cast<CmpInst>(S.MainOp);
8787+ auto *AltCI = cast<CmpInst>(S.AltOp);
8788+ CmpInst::Predicate MainP = MainCI->getPredicate();
8789+ CmpInst::Predicate AltP = AltCI->getPredicate();
8790+ assert(MainP != AltP &&
8791+ "Expected different main/alternate predicates.");
8792+ // Collect operands - commute if it uses the swapped predicate or
8793+ // alternate operation.
8794+ for (Value *V : VL) {
8795+ if (isa<PoisonValue>(V)) {
8796+ Left.push_back(
8797+ PoisonValue::get(MainCI->getOperand(0)->getType()));
8798+ Right.push_back(
8799+ PoisonValue::get(MainCI->getOperand(1)->getType()));
8800+ continue;
8801+ }
8802+ auto *Cmp = cast<CmpInst>(V);
8803+ Value *LHS = Cmp->getOperand(0);
8804+ Value *RHS = Cmp->getOperand(1);
87448805
8745- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8746- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8747- std::swap(LHS, RHS);
8748- } else {
8749- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8750- std::swap(LHS, RHS);
8806+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8807+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8808+ std::swap(LHS, RHS);
8809+ } else {
8810+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8811+ std::swap(LHS, RHS);
8812+ }
8813+ Left.push_back(LHS);
8814+ Right.push_back(RHS);
87518815 }
8752- Left.push_back(LHS);
8753- Right.push_back(RHS);
87548816 }
87558817 TE->setOperand(0, Left);
87568818 TE->setOperand(1, Right);
@@ -8759,8 +8821,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87598821 return;
87608822 }
87618823
8762- TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI );
8763- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8824+ TE->setOperandsInOrder( );
8825+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
87648826 buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
87658827 return;
87668828 }
@@ -13465,6 +13527,21 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1346513527 return Cost;
1346613528}
1346713529
13530+ // Perform operand reordering on the instructions in VL and return the reordered
13531+ // operands in Left and Right.
13532+ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13533+ SmallVectorImpl<Value *> &Left,
13534+ SmallVectorImpl<Value *> &Right,
13535+ const BoUpSLP &R) {
13536+ if (VL.empty())
13537+ return;
13538+ VLOperands Ops(VL, R);
13539+ // Reorder the operands in place.
13540+ Ops.reorder();
13541+ Left = Ops.getVL(0);
13542+ Right = Ops.getVL(1);
13543+ }
13544+
1346813545Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1346913546 auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1347013547 if (Res)
0 commit comments