@@ -3797,6 +3797,51 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
3797
3797
return None;
3798
3798
}
3799
3799
3800
+ /// Check if two insertelement instructions are from the same buildvector.
3801
+ static bool areTwoInsertFromSameBuildVector(
3802
+ InsertElementInst *VU, InsertElementInst *V,
3803
+ function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
3804
+ // Instructions must be from the same basic blocks.
3805
+ if (VU->getParent() != V->getParent())
3806
+ return false;
3807
+ // Checks if 2 insertelements are from the same buildvector.
3808
+ if (VU->getType() != V->getType())
3809
+ return false;
3810
+ // Multiple used inserts are separate nodes.
3811
+ if (!VU->hasOneUse() && !V->hasOneUse())
3812
+ return false;
3813
+ auto *IE1 = VU;
3814
+ auto *IE2 = V;
3815
+ Optional<unsigned> Idx1 = getInsertIndex(IE1);
3816
+ Optional<unsigned> Idx2 = getInsertIndex(IE2);
3817
+ if (Idx1 == None || Idx2 == None)
3818
+ return false;
3819
+ // Go through the vector operand of insertelement instructions trying to find
3820
+ // either VU as the original vector for IE2 or V as the original vector for
3821
+ // IE1.
3822
+ do {
3823
+ if (IE2 == VU)
3824
+ return VU->hasOneUse();
3825
+ if (IE1 == V)
3826
+ return V->hasOneUse();
3827
+ if (IE1) {
3828
+ if ((IE1 != VU && !IE1->hasOneUse()) ||
3829
+ getInsertIndex(IE1).value_or(*Idx2) == *Idx2)
3830
+ IE1 = nullptr;
3831
+ else
3832
+ IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
3833
+ }
3834
+ if (IE2) {
3835
+ if ((IE2 != V && !IE2->hasOneUse()) ||
3836
+ getInsertIndex(IE2).value_or(*Idx1) == *Idx1)
3837
+ IE2 = nullptr;
3838
+ else
3839
+ IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
3840
+ }
3841
+ } while (IE1 || IE2);
3842
+ return false;
3843
+ }
3844
+
3800
3845
Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
3801
3846
bool TopToBottom) {
3802
3847
// No need to reorder if need to shuffle reuses, still need to shuffle the
@@ -3860,6 +3905,58 @@ Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
3860
3905
(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
3861
3906
!TE.isAltShuffle())
3862
3907
return TE.ReorderIndices;
3908
+ if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
3909
+ auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) {
3910
+ if (!V1->hasOneUse() || !V2->hasOneUse())
3911
+ return false;
3912
+ auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
3913
+ auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
3914
+ if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
3915
+ if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
3916
+ if (!areTwoInsertFromSameBuildVector(
3917
+ IE1, IE2,
3918
+ [](InsertElementInst *II) { return II->getOperand(0); }))
3919
+ return false;
3920
+ Optional<unsigned> Idx1 = getInsertIndex(IE1);
3921
+ Optional<unsigned> Idx2 = getInsertIndex(IE2);
3922
+ if (Idx1 == None || Idx2 == None)
3923
+ return false;
3924
+ return *Idx1 < *Idx2;
3925
+ }
3926
+ if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
3927
+ if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
3928
+ if (EE1->getOperand(0) != EE2->getOperand(0))
3929
+ return false;
3930
+ Optional<unsigned> Idx1 = getExtractIndex(EE1);
3931
+ Optional<unsigned> Idx2 = getExtractIndex(EE2);
3932
+ if (Idx1 == None || Idx2 == None)
3933
+ return false;
3934
+ return *Idx1 < *Idx2;
3935
+ }
3936
+ return false;
3937
+ };
3938
+ auto IsIdentityOrder = [](const OrdersType &Order) {
3939
+ for (unsigned Idx : seq<unsigned>(0, Order.size()))
3940
+ if (Idx != Order[Idx])
3941
+ return false;
3942
+ return true;
3943
+ };
3944
+ if (!TE.ReorderIndices.empty())
3945
+ return TE.ReorderIndices;
3946
+ DenseMap<Value *, unsigned> PhiToId;
3947
+ SmallVector<Value *, 4> Phis;
3948
+ OrdersType ResOrder(TE.Scalars.size());
3949
+ for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) {
3950
+ PhiToId[TE.Scalars[Id]] = Id;
3951
+ Phis.push_back(TE.Scalars[Id]);
3952
+ }
3953
+ llvm::stable_sort(Phis, PHICompare);
3954
+ for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
3955
+ ResOrder[Id] = PhiToId[Phis[Id]];
3956
+ if (IsIdentityOrder(ResOrder))
3957
+ return {};
3958
+ return ResOrder;
3959
+ }
3863
3960
if (TE.State == TreeEntry::NeedToGather) {
3864
3961
// TODO: add analysis of other gather nodes with extractelement
3865
3962
// instructions and other values/instructions, not only undefs.
@@ -3937,6 +4034,9 @@ void BoUpSLP::reorderTopToBottom() {
3937
4034
// their ordering.
3938
4035
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
3939
4036
4037
+ // Phi nodes can have preferred ordering based on their result users
4038
+ DenseMap<const TreeEntry *, OrdersType> PhisToOrders;
4039
+
3940
4040
// AltShuffles can also have a preferred ordering that leads to fewer
3941
4041
// instructions, e.g., the addsub instruction in x86.
3942
4042
DenseMap<const TreeEntry *, OrdersType> AltShufflesToOrders;
@@ -3951,7 +4051,7 @@ void BoUpSLP::reorderTopToBottom() {
3951
4051
// extracts.
3952
4052
for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries,
3953
4053
&GathersToOrders, &ExternalUserReorderMap,
3954
- &AltShufflesToOrders](
4054
+ &AltShufflesToOrders, &PhisToOrders ](
3955
4055
const std::unique_ptr<TreeEntry> &TE) {
3956
4056
// Look for external users that will probably be vectorized.
3957
4057
SmallVector<OrdersType, 1> ExternalUserReorderIndices =
@@ -4008,6 +4108,9 @@ void BoUpSLP::reorderTopToBottom() {
4008
4108
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
4009
4109
if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty())
4010
4110
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
4111
+ if (TE->State == TreeEntry::Vectorize &&
4112
+ TE->getOpcode() == Instruction::PHI)
4113
+ PhisToOrders.try_emplace(TE.get(), *CurrentOrder);
4011
4114
}
4012
4115
});
4013
4116
@@ -4033,8 +4136,8 @@ void BoUpSLP::reorderTopToBottom() {
4033
4136
if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
4034
4137
continue;
4035
4138
// Count number of orders uses.
4036
- const auto &Order = [OpTE, &GathersToOrders,
4037
- &AltShufflesToOrders ]() -> const OrdersType & {
4139
+ const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders,
4140
+ &PhisToOrders ]() -> const OrdersType & {
4038
4141
if (OpTE->State == TreeEntry::NeedToGather ||
4039
4142
!OpTE->ReuseShuffleIndices.empty()) {
4040
4143
auto It = GathersToOrders.find(OpTE);
@@ -4046,6 +4149,12 @@ void BoUpSLP::reorderTopToBottom() {
4046
4149
if (It != AltShufflesToOrders.end())
4047
4150
return It->second;
4048
4151
}
4152
+ if (OpTE->State == TreeEntry::Vectorize &&
4153
+ OpTE->getOpcode() == Instruction::PHI) {
4154
+ auto It = PhisToOrders.find(OpTE);
4155
+ if (It != PhisToOrders.end())
4156
+ return It->second;
4157
+ }
4049
4158
return OpTE->ReorderIndices;
4050
4159
}();
4051
4160
// First consider the order of the external scalar users.
@@ -7140,51 +7249,6 @@ InstructionCost BoUpSLP::getSpillCost() const {
7140
7249
return Cost;
7141
7250
}
7142
7251
7143
- /// Check if two insertelement instructions are from the same buildvector.
7144
- static bool areTwoInsertFromSameBuildVector(
7145
- InsertElementInst *VU, InsertElementInst *V,
7146
- function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
7147
- // Instructions must be from the same basic blocks.
7148
- if (VU->getParent() != V->getParent())
7149
- return false;
7150
- // Checks if 2 insertelements are from the same buildvector.
7151
- if (VU->getType() != V->getType())
7152
- return false;
7153
- // Multiple used inserts are separate nodes.
7154
- if (!VU->hasOneUse() && !V->hasOneUse())
7155
- return false;
7156
- auto *IE1 = VU;
7157
- auto *IE2 = V;
7158
- Optional<unsigned> Idx1 = getInsertIndex(IE1);
7159
- Optional<unsigned> Idx2 = getInsertIndex(IE2);
7160
- if (Idx1 == None || Idx2 == None)
7161
- return false;
7162
- // Go through the vector operand of insertelement instructions trying to find
7163
- // either VU as the original vector for IE2 or V as the original vector for
7164
- // IE1.
7165
- do {
7166
- if (IE2 == VU)
7167
- return VU->hasOneUse();
7168
- if (IE1 == V)
7169
- return V->hasOneUse();
7170
- if (IE1) {
7171
- if ((IE1 != VU && !IE1->hasOneUse()) ||
7172
- getInsertIndex(IE1).value_or(*Idx2) == *Idx2)
7173
- IE1 = nullptr;
7174
- else
7175
- IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
7176
- }
7177
- if (IE2) {
7178
- if ((IE2 != V && !IE2->hasOneUse()) ||
7179
- getInsertIndex(IE2).value_or(*Idx1) == *Idx1)
7180
- IE2 = nullptr;
7181
- else
7182
- IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
7183
- }
7184
- } while (IE1 || IE2);
7185
- return false;
7186
- }
7187
-
7188
7252
/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
7189
7253
/// buildvector sequence.
7190
7254
static bool isFirstInsertElement(const InsertElementInst *IE1,
0 commit comments