Skip to content

Commit 282d182

Browse files
committed
[AArch64] Reorder Comparison Trees to Facilitate CSE
The AArch64 backend converts trees formed by conjunctions/disjunctions of comparisons into sequences of `CCMP` instructions. The implementation before this change checks whether a sub-tree must be processed first. If not, it processes the operations in the order they occur in the DAG. This may not be optimal if there is a corresponding `SUB` node for one of the comparisons. In this case, we should process this comparison first because we can then use the same instruction for the `SUB` node and the comparison. To achieve this, this commit comprises the following changes: - Extend `canEmitConjunction` with a new output parameter `PreferFirst`, which reports to the caller whether the sub-tree should preferably be processed first. - Set `PreferFirst` to `true` if we can find a corresponding `SUB` node in the DAG. - If we can process a sub-tree with `PreferFirst = true` first (i.e., we do not violate any `MustBeFirst` constraint by doing so), we swap the sub-trees. - The already existing code for performing the common subexpression elimination takes care to use only a single instruction for the comparison and the `SUB` node if possible. Closes #149685.
1 parent 39b54ba commit 282d182

File tree

2 files changed

+45
-29
lines changed

2 files changed

+45
-29
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3874,22 +3874,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
38743874
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
38753875
/// cannot do the negation naturally. We are required to
38763876
/// emit the subtree first in this case.
3877+
/// \param PreferFirst Set to true if processing this subtree first may
3878+
/// result in more efficient code.
38773879
/// \param WillNegate Is true if are called when the result of this
38783880
/// subexpression must be negated. This happens when the
38793881
/// outer expression is an OR. We can use this fact to know
38803882
/// that we have a double negation (or (or ...) ...) that
38813883
/// can be implemented for free.
3882-
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
3883-
bool &MustBeFirst, bool WillNegate,
3884+
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val,
3885+
bool &CanNegate, bool &MustBeFirst,
3886+
bool &PreferFirst, bool WillNegate,
38843887
unsigned Depth = 0) {
38853888
if (!Val.hasOneUse())
38863889
return false;
38873890
unsigned Opcode = Val->getOpcode();
38883891
if (Opcode == ISD::SETCC) {
3889-
if (Val->getOperand(0).getValueType() == MVT::f128)
3892+
EVT VT = Val->getOperand(0).getValueType();
3893+
if (VT == MVT::f128)
38903894
return false;
38913895
CanNegate = true;
38923896
MustBeFirst = false;
3897+
// Designate this operation as a preferred first operation if the result
3898+
// of a SUB operation can be reused.
3899+
PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT),
3900+
{Val->getOperand(0), Val->getOperand(1)});
38933901
return true;
38943902
}
38953903
// Protect against exponential runtime and stack overflow.
@@ -3901,11 +3909,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
39013909
SDValue O1 = Val->getOperand(1);
39023910
bool CanNegateL;
39033911
bool MustBeFirstL;
3904-
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
3912+
bool PreferFirstL;
3913+
if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL,
3914+
IsOR, Depth + 1))
39053915
return false;
39063916
bool CanNegateR;
39073917
bool MustBeFirstR;
3908-
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
3918+
bool PreferFirstR;
3919+
if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR,
3920+
IsOR, Depth + 1))
39093921
return false;
39103922

39113923
if (MustBeFirstL && MustBeFirstR)
@@ -3928,6 +3940,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
39283940
CanNegate = false;
39293941
MustBeFirst = MustBeFirstL || MustBeFirstR;
39303942
}
3943+
PreferFirst = PreferFirstL || PreferFirstR;
39313944
return true;
39323945
}
39333946
return false;
@@ -3989,19 +4002,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
39894002
SDValue LHS = Val->getOperand(0);
39904003
bool CanNegateL;
39914004
bool MustBeFirstL;
3992-
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
4005+
bool PreferFirstL;
4006+
bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL,
4007+
PreferFirstL, IsOR);
39934008
assert(ValidL && "Valid conjunction/disjunction tree");
39944009
(void)ValidL;
39954010

39964011
SDValue RHS = Val->getOperand(1);
39974012
bool CanNegateR;
39984013
bool MustBeFirstR;
3999-
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
4014+
bool PreferFirstR;
4015+
bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR,
4016+
PreferFirstR, IsOR);
40004017
assert(ValidR && "Valid conjunction/disjunction tree");
40014018
(void)ValidR;
40024019

4003-
// Swap sub-tree that must come first to the right side.
4004-
if (MustBeFirstL) {
4020+
bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4021+
4022+
// Swap sub-tree that must or should come first to the right side.
4023+
if (MustBeFirstL || ShouldFirstL) {
40054024
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
40064025
std::swap(LHS, RHS);
40074026
std::swap(CanNegateL, CanNegateR);
@@ -4057,7 +4076,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
40574076
AArch64CC::CondCode &OutCC) {
40584077
bool DummyCanNegate;
40594078
bool DummyMustBeFirst;
4060-
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
4079+
bool DummyPreferFirst;
4080+
if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst,
4081+
DummyPreferFirst, false))
40614082
return SDValue();
40624083

40634084
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);

llvm/test/CodeGen/AArch64/ccmp-cse.ll

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
55
; CHECK-LABEL: test_single_or:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: cmp x2, x0
8-
; CHECK-NEXT: sub x8, x2, x1
9-
; CHECK-NEXT: ccmp x2, x1, #0, ls
10-
; CHECK-NEXT: csel x0, xzr, x8, lo
7+
; CHECK-NEXT: subs x8, x2, x1
8+
; CHECK-NEXT: ccmp x2, x0, #2, hs
9+
; CHECK-NEXT: csel x0, xzr, x8, hi
1110
; CHECK-NEXT: ret
1211
%cmp.match = icmp ult i64 %y, %x
1312
%cmp.nomatch = icmp ugt i64 %y, %unrelated
@@ -20,11 +19,10 @@ define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
2019
define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
2120
; CHECK-LABEL: test_two_ors:
2221
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmp x2, x0
24-
; CHECK-NEXT: sub x8, x2, x1
25-
; CHECK-NEXT: ccmp x2, x1, #0, ls
22+
; CHECK-NEXT: subs x8, x2, x1
2623
; CHECK-NEXT: ccmp x0, x1, #0, hs
27-
; CHECK-NEXT: csel x0, xzr, x8, lo
24+
; CHECK-NEXT: ccmp x2, x0, #2, hs
25+
; CHECK-NEXT: csel x0, xzr, x8, hi
2826
; CHECK-NEXT: ret
2927
%cmp.match = icmp ult i64 %y, %x
3028
%cmp.nomatch1 = icmp ult i64 %unrelated, %x
@@ -39,11 +37,10 @@ define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
3937
define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
4038
; CHECK-LABEL: test_two_ors_commuted:
4139
; CHECK: // %bb.0:
42-
; CHECK-NEXT: cmp x2, x0
43-
; CHECK-NEXT: sub x8, x2, x1
44-
; CHECK-NEXT: ccmp x2, x1, #0, ls
40+
; CHECK-NEXT: subs x8, x2, x1
4541
; CHECK-NEXT: ccmp x0, x1, #0, hs
46-
; CHECK-NEXT: csel x0, xzr, x8, lo
42+
; CHECK-NEXT: ccmp x2, x0, #2, hs
43+
; CHECK-NEXT: csel x0, xzr, x8, hi
4744
; CHECK-NEXT: ret
4845
%cmp.match = icmp ult i64 %y, %x
4946
%cmp.nomatch1 = icmp ult i64 %unrelated, %x
@@ -58,10 +55,9 @@ define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
5855
define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
5956
; CHECK-LABEL: test_single_and:
6057
; CHECK: // %bb.0:
61-
; CHECK-NEXT: cmp x2, x0
62-
; CHECK-NEXT: sub x8, x2, x1
63-
; CHECK-NEXT: ccmp x2, x1, #2, hi
64-
; CHECK-NEXT: csel x0, xzr, x8, lo
58+
; CHECK-NEXT: subs x8, x2, x1
59+
; CHECK-NEXT: ccmp x2, x0, #0, lo
60+
; CHECK-NEXT: csel x0, xzr, x8, hi
6561
; CHECK-NEXT: ret
6662
%cmp.match = icmp ult i64 %y, %x
6763
%cmp.nomatch = icmp ugt i64 %y, %unrelated
@@ -74,9 +70,8 @@ define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
7470
define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
7571
; CHECK-LABEL: test_single_or_sub_commuted:
7672
; CHECK: // %bb.0:
77-
; CHECK-NEXT: cmp x2, x0
78-
; CHECK-NEXT: sub x8, x1, x2
79-
; CHECK-NEXT: ccmp x1, x2, #2, ls
73+
; CHECK-NEXT: subs x8, x1, x2
74+
; CHECK-NEXT: ccmp x2, x0, #2, ls
8075
; CHECK-NEXT: csel x0, xzr, x8, hi
8176
; CHECK-NEXT: ret
8277
%cmp.match = icmp ult i64 %y, %x

0 commit comments

Comments
 (0)