diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 33657c26356d6..7ea039e04ca72 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2017,6 +2017,9 @@ class BoUpSLP { /// A vector of operand vectors. SmallVector OpsVec; + /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0] + /// is not IntrinsicInst, ArgSize is User::getNumOperands. + unsigned ArgSize = 0; const TargetLibraryInfo &TLI; const DataLayout &DL; @@ -2404,10 +2407,12 @@ class BoUpSLP { assert(!VL.empty() && "Bad VL"); assert((empty() || VL.size() == getNumLanes()) && "Expected same number of lanes"); + // IntrinsicInst::isCommutative returns true if swapping the first "two" + // arguments to the intrinsic produces the same result. constexpr unsigned IntrinsicNumOperands = 2; auto *VL0 = cast(*find_if(VL, IsaPred)); - unsigned NumOperands = isa(VL0) ? IntrinsicNumOperands - : VL0->getNumOperands(); + unsigned NumOperands = VL0->getNumOperands(); + ArgSize = isa(VL0) ? IntrinsicNumOperands : NumOperands; OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { @@ -2440,7 +2445,7 @@ class BoUpSLP { } /// \returns the number of operands. - unsigned getNumOperands() const { return OpsVec.size(); } + unsigned getNumOperands() const { return ArgSize; } /// \returns the number of lanes. unsigned getNumLanes() const { return OpsVec[0].size(); } @@ -2617,7 +2622,8 @@ class BoUpSLP { ArrayRef Op0 = OpsVec.front(); for (const OperandData &Data : Op0) UniqueValues.insert(Data.V); - for (ArrayRef Op : drop_begin(OpsVec, 1)) { + for (ArrayRef Op : + ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) { if (any_of(Op, [&UniqueValues](const OperandData &Data) { return !UniqueValues.contains(Data.V); })) @@ -3138,13 +3144,6 @@ class BoUpSLP { SmallVector>>, 8> &GatheredLoads); - /// Reorder commutative or alt operands to get better probability of - /// generating vectorized code. - static void reorderInputsAccordingToOpcode(ArrayRef VL, - SmallVectorImpl &Left, - SmallVectorImpl &Right, - const BoUpSLP &R); - /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the /// users of \p TE and collects the stores. It returns the map from the store /// pointers to the collected stores. @@ -3339,27 +3338,15 @@ class BoUpSLP { copy(OpVL, Operands[OpIdx].begin()); } - /// Set the operands of this bundle in their original order. - void setOperandsInOrder() { - assert(Operands.empty() && "Already initialized?"); - auto *I0 = cast(*find_if(Scalars, IsaPred)); - Operands.resize(I0->getNumOperands()); - unsigned NumLanes = Scalars.size(); - for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands(); - OpIdx != NumOperands; ++OpIdx) { - Operands[OpIdx].resize(NumLanes); - for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { - if (isa(Scalars[Lane])) { - Operands[OpIdx][Lane] = - PoisonValue::get(I0->getOperand(OpIdx)->getType()); - continue; - } - auto *I = cast(Scalars[Lane]); - assert(I->getNumOperands() == NumOperands && - "Expected same number of operands"); - Operands[OpIdx][Lane] = I->getOperand(OpIdx); - } - } + /// Set this bundle's operand from \p VL. + void setOperand(ArrayRef VL, const BoUpSLP &R, + bool RequireReorder = false) { + VLOperands Ops(VL, R); + if (RequireReorder) + Ops.reorder(); + for (unsigned I : + seq(cast(VL[0])->getNumOperands())) + setOperand(I, Ops.getVL(I)); } /// Reorders operands of the node to the given mask \p Mask. @@ -8459,7 +8446,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, {}, CurrentOrder); LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n"); - TE->setOperandsInOrder(); + TE->setOperand(VL, *this); buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1}); return; } @@ -8480,27 +8467,26 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); else LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); - TE->setOperandsInOrder(); break; case TreeEntry::StridedVectorize: // Vectorizing non-consecutive loads with `llvm.masked.gather`. TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S, UserTreeIdx, ReuseShuffleIndices, CurrentOrder); - TE->setOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n"); break; case TreeEntry::ScatterVectorize: // Vectorizing non-consecutive loads with `llvm.masked.gather`. TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S, UserTreeIdx, ReuseShuffleIndices); - TE->setOperandsInOrder(); - buildTree_rec(PointerOps, Depth + 1, {TE, 0}); LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n"); break; case TreeEntry::CombinedVectorize: case TreeEntry::NeedToGather: llvm_unreachable("Unexpected loads state."); } + TE->setOperand(VL, *this); + if (State == TreeEntry::ScatterVectorize) + buildTree_rec(PointerOps, Depth + 1, {TE, 0}); return; } case Instruction::ZExt: @@ -8538,8 +8524,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, ReuseShuffleIndices); LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); - TE->setOperandsInOrder(); - for (unsigned I : seq(0, VL0->getNumOperands())) + TE->setOperand(VL, *this); + for (unsigned I : seq(VL0->getNumOperands())) buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I}); if (ShuffleOrOp == Instruction::Trunc) { ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx); @@ -8566,12 +8552,15 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; + VLOperands Ops(VL, *this); if (cast(VL0)->isCommutative()) { // Commutative predicate - collect + sort operands of the instructions // so that each side is more likely to have the same opcode. assert(P0 == CmpInst::getSwappedPredicate(P0) && "Commutative Predicate mismatch"); - reorderInputsAccordingToOpcode(VL, Left, Right, *this); + Ops.reorder(); + Left = Ops.getVL(0); + Right = Ops.getVL(1); } else { // Collect operands - commute if it uses the swapped predicate. for (Value *V : VL) { @@ -8632,20 +8621,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, ReuseShuffleIndices); LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n"); - // Sort operands of the instructions so that each side is more likely to - // have the same opcode. - if (isa(VL0) && isCommutative(VL0)) { - ValueList Left, Right; - reorderInputsAccordingToOpcode(VL, Left, Right, *this); - TE->setOperand(0, Left); - TE->setOperand(1, Right); - buildTree_rec(Left, Depth + 1, {TE, 0}); - buildTree_rec(Right, Depth + 1, {TE, 1}); - return; - } - - TE->setOperandsInOrder(); - for (unsigned I : seq(0, VL0->getNumOperands())) + TE->setOperand(VL, *this, isa(VL0) && isCommutative(VL0)); + for (unsigned I : seq(VL0->getNumOperands())) buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I}); return; } @@ -8710,7 +8687,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, fixupOrderingIndices(CurrentOrder); TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndices, CurrentOrder); - TE->setOperandsInOrder(); + TE->setOperand(VL, *this); buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0}); if (Consecutive) LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); @@ -8726,46 +8703,13 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndices); - // Sort operands of the instructions so that each side is more likely to - // have the same opcode. - if (isCommutative(VL0)) { - ValueList Left, Right; - reorderInputsAccordingToOpcode(VL, Left, Right, *this); - TE->setOperand(0, Left); - TE->setOperand(1, Right); - SmallVector Operands; - for (unsigned I : seq(2, CI->arg_size())) { - Operands.emplace_back(); - if (isVectorIntrinsicWithScalarOpAtArg(ID, I)) - continue; - for (Value *V : VL) { - auto *CI2 = cast(V); - Operands.back().push_back(CI2->getArgOperand(I)); - } - TE->setOperand(I, Operands.back()); - } - buildTree_rec(Left, Depth + 1, {TE, 0}); - buildTree_rec(Right, Depth + 1, {TE, 1}); - for (unsigned I : seq(2, CI->arg_size())) { - if (Operands[I - 2].empty()) - continue; - buildTree_rec(Operands[I - 2], Depth + 1, {TE, I}); - } - return; - } - TE->setOperandsInOrder(); - for (unsigned I : seq(0, CI->arg_size())) { + TE->setOperand(VL, *this, isCommutative(VL0)); + for (unsigned I : seq(CI->arg_size())) { // For scalar operands no need to create an entry since no need to // vectorize it. if (isVectorIntrinsicWithScalarOpAtArg(ID, I)) continue; - ValueList Operands; - // Prepare the operand vector. - for (Value *V : VL) { - auto *CI2 = cast(V); - Operands.push_back(CI2->getArgOperand(I)); - } - buildTree_rec(Operands, Depth + 1, {TE, I}); + buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I}); } return; } @@ -8776,43 +8720,37 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Reorder operands if reordering would enable vectorization. auto *CI = dyn_cast(VL0); - if (isa(VL0) || CI) { + if (CI && any_of(VL, [](Value *V) { + return !isa(V) && !cast(V)->isCommutative(); + })) { + auto *MainCI = cast(S.MainOp); + auto *AltCI = cast(S.AltOp); + CmpInst::Predicate MainP = MainCI->getPredicate(); + CmpInst::Predicate AltP = AltCI->getPredicate(); + assert(MainP != AltP && + "Expected different main/alternate predicates."); ValueList Left, Right; - if (!CI || all_of(VL, [](Value *V) { - return isa(V) || cast(V)->isCommutative(); - })) { - reorderInputsAccordingToOpcode(VL, Left, Right, *this); - } else { - auto *MainCI = cast(S.MainOp); - auto *AltCI = cast(S.AltOp); - CmpInst::Predicate MainP = MainCI->getPredicate(); - CmpInst::Predicate AltP = AltCI->getPredicate(); - assert(MainP != AltP && - "Expected different main/alternate predicates."); - // Collect operands - commute if it uses the swapped predicate or - // alternate operation. - for (Value *V : VL) { - if (isa(V)) { - Left.push_back( - PoisonValue::get(MainCI->getOperand(0)->getType())); - Right.push_back( - PoisonValue::get(MainCI->getOperand(1)->getType())); - continue; - } - auto *Cmp = cast(V); - Value *LHS = Cmp->getOperand(0); - Value *RHS = Cmp->getOperand(1); + // Collect operands - commute if it uses the swapped predicate or + // alternate operation. + for (Value *V : VL) { + if (isa(V)) { + Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType())); + Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType())); + continue; + } + auto *Cmp = cast(V); + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); - if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) { - if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate())) - std::swap(LHS, RHS); - } else { - if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate())) - std::swap(LHS, RHS); - } - Left.push_back(LHS); - Right.push_back(RHS); + if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) { + if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate())) + std::swap(LHS, RHS); + } else { + if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate())) + std::swap(LHS, RHS); } + Left.push_back(LHS); + Right.push_back(RHS); } TE->setOperand(0, Left); TE->setOperand(1, Right); @@ -8821,8 +8759,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, return; } - TE->setOperandsInOrder(); - for (unsigned I : seq(0, VL0->getNumOperands())) + TE->setOperand(VL, *this, isa(VL0) || CI); + for (unsigned I : seq(VL0->getNumOperands())) buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I}); return; } @@ -13526,21 +13464,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef VL, bool ForPoisonSrc, return Cost; } -// Perform operand reordering on the instructions in VL and return the reordered -// operands in Left and Right. -void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef VL, - SmallVectorImpl &Left, - SmallVectorImpl &Right, - const BoUpSLP &R) { - if (VL.empty()) - return; - VLOperands Ops(VL, R); - // Reorder the operands in place. - Ops.reorder(); - Left = Ops.getVL(0); - Right = Ops.getVL(1); -} - Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { auto &Res = EntryToLastInstruction.try_emplace(E).first->second; if (Res)