Skip to content

Commit d80c1f5

Browse files
committed
[SLP] Sort PHIs by ExtractElementInst when relevant
Change-Id: Ife26ac762ee933cfe0b43a4b584caf08c36f0493
1 parent c210992 commit d80c1f5

File tree

2 files changed

+46
-11
lines changed

2 files changed

+46
-11
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15742,7 +15742,7 @@ static bool tryToVectorizeSequence(
1574215742
// Sort by type, parent, operands.
1574315743
stable_sort(Incoming, Comparator);
1574415744

15745-
// Try to vectorize elements base on their type.
15745+
// Try to vectorize elements base on their type.
1574615746
SmallVector<T *> Candidates;
1574715747
for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E;) {
1574815748
// Look for the next elements with the same type, parent and operand
@@ -16014,7 +16014,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
1601416014
continue;
1601516015
return isa<UndefValue>(Opcodes2[I]);
1601616016
}
16017-
if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
16017+
if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I])) {
1601816018
if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
1601916019
DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
1602016020
DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
@@ -16027,11 +16027,47 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
1602716027
"Different nodes should have different DFS numbers");
1602816028
if (NodeI1 != NodeI2)
1602916029
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
16030+
1603016031
InstructionsState S = getSameOpcode({I1, I2}, *TLI);
16031-
if (S.getOpcode() && !S.isAltShuffle())
16032+
if (S.getOpcode() && !S.isAltShuffle()) {
16033+
if (!isa<ExtractElementInst>(I1) || !isa<ExtractElementInst>(I2))
16034+
continue;
16035+
16036+
auto E1 = cast<ExtractElementInst>(I1);
16037+
auto E2 = cast<ExtractElementInst>(I2);
16038+
// Sort on ExtractElementInsts primarily by vector operands. Prefer program order of the
16039+
// vector operands
16040+
if (E1->getVectorOperand() != E2->getVectorOperand()) {
16041+
Instruction *V1 = dyn_cast<Instruction>(E1->getVectorOperand());
16042+
Instruction *V2 = dyn_cast<Instruction>(E2->getVectorOperand());
16043+
if (!V1 || !V2)
16044+
continue;
16045+
if (V1->getParent() != V2->getParent())
16046+
continue;
16047+
return V1->comesBefore(V2);
16048+
}
16049+
// If we have the same vector operand, try to sort by constant index
16050+
auto Id1 = E1->getIndexOperand();
16051+
auto Id2 = E2->getIndexOperand();
16052+
// Bring constants to the top
16053+
if (isa<ConstantInt>(Id1) && !isa<ConstantInt>(Id2))
16054+
return true;
16055+
if (!isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2))
16056+
return false;
16057+
if (isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2)) {
16058+
auto C1 = cast<ConstantInt>(Id1);
16059+
auto C2 = cast<ConstantInt>(Id2);
16060+
// First elements first
16061+
return C1->getValue().getZExtValue() <
16062+
C2->getValue().getZExtValue();
16063+
}
16064+
1603216065
continue;
16066+
}
16067+
1603316068
return I1->getOpcode() < I2->getOpcode();
1603416069
}
16070+
}
1603516071
if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
1603616072
return Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID();
1603716073
if (isa<Instruction>(Opcodes1[I]))
@@ -16120,7 +16156,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
1612016156
}
1612116157
}
1612216158
}
16123-
1612416159
HaveVectorizedPhiNodes = tryToVectorizeSequence<Value>(
1612516160
Incoming, PHICompare, AreCompatiblePHIs,
1612616161
[this, &R](ArrayRef<Value *> Candidates, bool MaxVFOnly) {

llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,19 @@ bb1:
4949
define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
5050
; CHECK-LABEL: @phis_reverse(
5151
; CHECK-NEXT: entry:
52-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
53-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
52+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
53+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
5454
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
5555
; CHECK: bb0:
56-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
57-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
56+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
57+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
5858
; CHECK-NEXT: br label [[BB1]]
5959
; CHECK: bb1:
6060
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
6161
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
62-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
63-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
64-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
62+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
63+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
64+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6565
; CHECK-NEXT: ret <4 x half> [[TMP8]]
6666
;
6767
entry:

0 commit comments

Comments
 (0)