Skip to content

Commit 42bce72

Browse files
committed
Reapply "[SLP] Extend reordering data of tree entry to support PHInodes".
Reapplies 87a2086 (which was reverted in 656f1d8). Fix for scalable vectors in getInsertIndex merged in 46d53f4. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D137537
1 parent d19ba74 commit 42bce72

File tree

2 files changed

+117
-53
lines changed

2 files changed

+117
-53
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 112 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3797,6 +3797,51 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
37973797
return None;
37983798
}
37993799

3800+
/// Check if two insertelement instructions are from the same buildvector.
3801+
static bool areTwoInsertFromSameBuildVector(
3802+
InsertElementInst *VU, InsertElementInst *V,
3803+
function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
3804+
// Instructions must be from the same basic blocks.
3805+
if (VU->getParent() != V->getParent())
3806+
return false;
3807+
// Checks if 2 insertelements are from the same buildvector.
3808+
if (VU->getType() != V->getType())
3809+
return false;
3810+
// Multiple used inserts are separate nodes.
3811+
if (!VU->hasOneUse() && !V->hasOneUse())
3812+
return false;
3813+
auto *IE1 = VU;
3814+
auto *IE2 = V;
3815+
Optional<unsigned> Idx1 = getInsertIndex(IE1);
3816+
Optional<unsigned> Idx2 = getInsertIndex(IE2);
3817+
if (Idx1 == None || Idx2 == None)
3818+
return false;
3819+
// Go through the vector operand of insertelement instructions trying to find
3820+
// either VU as the original vector for IE2 or V as the original vector for
3821+
// IE1.
3822+
do {
3823+
if (IE2 == VU)
3824+
return VU->hasOneUse();
3825+
if (IE1 == V)
3826+
return V->hasOneUse();
3827+
if (IE1) {
3828+
if ((IE1 != VU && !IE1->hasOneUse()) ||
3829+
getInsertIndex(IE1).value_or(*Idx2) == *Idx2)
3830+
IE1 = nullptr;
3831+
else
3832+
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
3833+
}
3834+
if (IE2) {
3835+
if ((IE2 != V && !IE2->hasOneUse()) ||
3836+
getInsertIndex(IE2).value_or(*Idx1) == *Idx1)
3837+
IE2 = nullptr;
3838+
else
3839+
IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
3840+
}
3841+
} while (IE1 || IE2);
3842+
return false;
3843+
}
3844+
38003845
Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
38013846
bool TopToBottom) {
38023847
// No need to reorder if need to shuffle reuses, still need to shuffle the
@@ -3860,6 +3905,58 @@ Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
38603905
(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
38613906
!TE.isAltShuffle())
38623907
return TE.ReorderIndices;
3908+
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
3909+
auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) {
3910+
if (!V1->hasOneUse() || !V2->hasOneUse())
3911+
return false;
3912+
auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
3913+
auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
3914+
if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
3915+
if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
3916+
if (!areTwoInsertFromSameBuildVector(
3917+
IE1, IE2,
3918+
[](InsertElementInst *II) { return II->getOperand(0); }))
3919+
return false;
3920+
Optional<unsigned> Idx1 = getInsertIndex(IE1);
3921+
Optional<unsigned> Idx2 = getInsertIndex(IE2);
3922+
if (Idx1 == None || Idx2 == None)
3923+
return false;
3924+
return *Idx1 < *Idx2;
3925+
}
3926+
if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
3927+
if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
3928+
if (EE1->getOperand(0) != EE2->getOperand(0))
3929+
return false;
3930+
Optional<unsigned> Idx1 = getExtractIndex(EE1);
3931+
Optional<unsigned> Idx2 = getExtractIndex(EE2);
3932+
if (Idx1 == None || Idx2 == None)
3933+
return false;
3934+
return *Idx1 < *Idx2;
3935+
}
3936+
return false;
3937+
};
3938+
auto IsIdentityOrder = [](const OrdersType &Order) {
3939+
for (unsigned Idx : seq<unsigned>(0, Order.size()))
3940+
if (Idx != Order[Idx])
3941+
return false;
3942+
return true;
3943+
};
3944+
if (!TE.ReorderIndices.empty())
3945+
return TE.ReorderIndices;
3946+
DenseMap<Value *, unsigned> PhiToId;
3947+
SmallVector<Value *, 4> Phis;
3948+
OrdersType ResOrder(TE.Scalars.size());
3949+
for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) {
3950+
PhiToId[TE.Scalars[Id]] = Id;
3951+
Phis.push_back(TE.Scalars[Id]);
3952+
}
3953+
llvm::stable_sort(Phis, PHICompare);
3954+
for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
3955+
ResOrder[Id] = PhiToId[Phis[Id]];
3956+
if (IsIdentityOrder(ResOrder))
3957+
return {};
3958+
return ResOrder;
3959+
}
38633960
if (TE.State == TreeEntry::NeedToGather) {
38643961
// TODO: add analysis of other gather nodes with extractelement
38653962
// instructions and other values/instructions, not only undefs.
@@ -3937,6 +4034,9 @@ void BoUpSLP::reorderTopToBottom() {
39374034
// their ordering.
39384035
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
39394036

4037+
// Phi nodes can have preferred ordering based on their result users
4038+
DenseMap<const TreeEntry *, OrdersType> PhisToOrders;
4039+
39404040
// AltShuffles can also have a preferred ordering that leads to fewer
39414041
// instructions, e.g., the addsub instruction in x86.
39424042
DenseMap<const TreeEntry *, OrdersType> AltShufflesToOrders;
@@ -3951,7 +4051,7 @@ void BoUpSLP::reorderTopToBottom() {
39514051
// extracts.
39524052
for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries,
39534053
&GathersToOrders, &ExternalUserReorderMap,
3954-
&AltShufflesToOrders](
4054+
&AltShufflesToOrders, &PhisToOrders](
39554055
const std::unique_ptr<TreeEntry> &TE) {
39564056
// Look for external users that will probably be vectorized.
39574057
SmallVector<OrdersType, 1> ExternalUserReorderIndices =
@@ -4008,6 +4108,9 @@ void BoUpSLP::reorderTopToBottom() {
40084108
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
40094109
if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty())
40104110
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
4111+
if (TE->State == TreeEntry::Vectorize &&
4112+
TE->getOpcode() == Instruction::PHI)
4113+
PhisToOrders.try_emplace(TE.get(), *CurrentOrder);
40114114
}
40124115
});
40134116

@@ -4033,8 +4136,8 @@ void BoUpSLP::reorderTopToBottom() {
40334136
if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
40344137
continue;
40354138
// Count number of orders uses.
4036-
const auto &Order = [OpTE, &GathersToOrders,
4037-
&AltShufflesToOrders]() -> const OrdersType & {
4139+
const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders,
4140+
&PhisToOrders]() -> const OrdersType & {
40384141
if (OpTE->State == TreeEntry::NeedToGather ||
40394142
!OpTE->ReuseShuffleIndices.empty()) {
40404143
auto It = GathersToOrders.find(OpTE);
@@ -4046,6 +4149,12 @@ void BoUpSLP::reorderTopToBottom() {
40464149
if (It != AltShufflesToOrders.end())
40474150
return It->second;
40484151
}
4152+
if (OpTE->State == TreeEntry::Vectorize &&
4153+
OpTE->getOpcode() == Instruction::PHI) {
4154+
auto It = PhisToOrders.find(OpTE);
4155+
if (It != PhisToOrders.end())
4156+
return It->second;
4157+
}
40494158
return OpTE->ReorderIndices;
40504159
}();
40514160
// First consider the order of the external scalar users.
@@ -7140,51 +7249,6 @@ InstructionCost BoUpSLP::getSpillCost() const {
71407249
return Cost;
71417250
}
71427251

7143-
/// Check if two insertelement instructions are from the same buildvector.
7144-
static bool areTwoInsertFromSameBuildVector(
7145-
InsertElementInst *VU, InsertElementInst *V,
7146-
function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
7147-
// Instructions must be from the same basic blocks.
7148-
if (VU->getParent() != V->getParent())
7149-
return false;
7150-
// Checks if 2 insertelements are from the same buildvector.
7151-
if (VU->getType() != V->getType())
7152-
return false;
7153-
// Multiple used inserts are separate nodes.
7154-
if (!VU->hasOneUse() && !V->hasOneUse())
7155-
return false;
7156-
auto *IE1 = VU;
7157-
auto *IE2 = V;
7158-
Optional<unsigned> Idx1 = getInsertIndex(IE1);
7159-
Optional<unsigned> Idx2 = getInsertIndex(IE2);
7160-
if (Idx1 == None || Idx2 == None)
7161-
return false;
7162-
// Go through the vector operand of insertelement instructions trying to find
7163-
// either VU as the original vector for IE2 or V as the original vector for
7164-
// IE1.
7165-
do {
7166-
if (IE2 == VU)
7167-
return VU->hasOneUse();
7168-
if (IE1 == V)
7169-
return V->hasOneUse();
7170-
if (IE1) {
7171-
if ((IE1 != VU && !IE1->hasOneUse()) ||
7172-
getInsertIndex(IE1).value_or(*Idx2) == *Idx2)
7173-
IE1 = nullptr;
7174-
else
7175-
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
7176-
}
7177-
if (IE2) {
7178-
if ((IE2 != V && !IE2->hasOneUse()) ||
7179-
getInsertIndex(IE2).value_or(*Idx1) == *Idx1)
7180-
IE2 = nullptr;
7181-
else
7182-
IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
7183-
}
7184-
} while (IE1 || IE2);
7185-
return false;
7186-
}
7187-
71887252
/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
71897253
/// buildvector sequence.
71907254
static bool isFirstInsertElement(const InsertElementInst *IE1,

llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
6363
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1
6464
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2
6565
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
66-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0
67-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1
66+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0
67+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1
6868
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0
6969
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1
7070
; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
@@ -73,15 +73,15 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
7373
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1
7474
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2
7575
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
76-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0
77-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1
76+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0
77+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1
7878
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0
7979
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1
8080
; CHECK-NEXT: br label [[BB1:%.*]]
8181
; CHECK: bb1:
8282
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ]
8383
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ]
84-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
84+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
8585
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
8686
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
8787
; CHECK-NEXT: ret <4 x half> [[TMP12]]

0 commit comments

Comments
 (0)