Skip to content

Commit d4cc755

Browse files
mskampgithub-actions[bot]
authored andcommitted
Automerge: [AArch64] Reorder Comparison Trees to Facilitate CSE (#168064)
The AArch64 backend converts trees formed by conjunctions/disjunctions of comparisons into sequences of `CCMP` instructions. The implementation before this change checks whether a sub-tree must be processed first. If not, it processes the operations in the order they occur in the DAG. This may not be optimal if there is a corresponding `SUB` node for one of the comparisons. In this case, we should process this comparison first because we can then use the same instruction for the `SUB` node and the comparison. To achieve this, this commit comprises the following changes: - Extend `canEmitConjunction` with a new output parameter `PreferFirst`, which reports to the caller whether the sub-tree should preferably be processed first. - Set `PreferFirst` to `true` if we can find a corresponding `SUB` node in the DAG. - If we can process a sub-tree with `PreferFirst = true` first (i.e., we do not violate any `MustBeFirst` constraint by doing so), we swap the sub-trees. - The already existing code for performing the common subexpression elimination takes care to use only a single instruction for the comparison and the `SUB` node if possible. Closes #149685.
2 parents 1a07c23 + 94e9bfb commit d4cc755

File tree

2 files changed

+170
-10
lines changed

2 files changed

+170
-10
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3886,22 +3886,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
38863886
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
38873887
/// cannot do the negation naturally. We are required to
38883888
/// emit the subtree first in this case.
3889+
/// \param PreferFirst Set to true if processing this subtree first may
3890+
/// result in more efficient code.
38893891
/// \param WillNegate Is true if are called when the result of this
38903892
/// subexpression must be negated. This happens when the
38913893
/// outer expression is an OR. We can use this fact to know
38923894
/// that we have a double negation (or (or ...) ...) that
38933895
/// can be implemented for free.
3894-
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
3895-
bool &MustBeFirst, bool WillNegate,
3896+
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val,
3897+
bool &CanNegate, bool &MustBeFirst,
3898+
bool &PreferFirst, bool WillNegate,
38963899
unsigned Depth = 0) {
38973900
if (!Val.hasOneUse())
38983901
return false;
38993902
unsigned Opcode = Val->getOpcode();
39003903
if (Opcode == ISD::SETCC) {
3901-
if (Val->getOperand(0).getValueType() == MVT::f128)
3904+
EVT VT = Val->getOperand(0).getValueType();
3905+
if (VT == MVT::f128)
39023906
return false;
39033907
CanNegate = true;
39043908
MustBeFirst = false;
3909+
// Designate this operation as a preferred first operation if the result
3910+
// of a SUB operation can be reused.
3911+
PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT),
3912+
{Val->getOperand(0), Val->getOperand(1)});
39053913
return true;
39063914
}
39073915
// Protect against exponential runtime and stack overflow.
@@ -3913,11 +3921,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
39133921
SDValue O1 = Val->getOperand(1);
39143922
bool CanNegateL;
39153923
bool MustBeFirstL;
3916-
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
3924+
bool PreferFirstL;
3925+
if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL,
3926+
IsOR, Depth + 1))
39173927
return false;
39183928
bool CanNegateR;
39193929
bool MustBeFirstR;
3920-
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
3930+
bool PreferFirstR;
3931+
if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR,
3932+
IsOR, Depth + 1))
39213933
return false;
39223934

39233935
if (MustBeFirstL && MustBeFirstR)
@@ -3940,6 +3952,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
39403952
CanNegate = false;
39413953
MustBeFirst = MustBeFirstL || MustBeFirstR;
39423954
}
3955+
PreferFirst = PreferFirstL || PreferFirstR;
39433956
return true;
39443957
}
39453958
return false;
@@ -4001,19 +4014,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
40014014
SDValue LHS = Val->getOperand(0);
40024015
bool CanNegateL;
40034016
bool MustBeFirstL;
4004-
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
4017+
bool PreferFirstL;
4018+
bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL,
4019+
PreferFirstL, IsOR);
40054020
assert(ValidL && "Valid conjunction/disjunction tree");
40064021
(void)ValidL;
40074022

40084023
SDValue RHS = Val->getOperand(1);
40094024
bool CanNegateR;
40104025
bool MustBeFirstR;
4011-
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
4026+
bool PreferFirstR;
4027+
bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR,
4028+
PreferFirstR, IsOR);
40124029
assert(ValidR && "Valid conjunction/disjunction tree");
40134030
(void)ValidR;
40144031

4015-
// Swap sub-tree that must come first to the right side.
4016-
if (MustBeFirstL) {
4032+
bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4033+
4034+
// Swap sub-tree that must or should come first to the right side.
4035+
if (MustBeFirstL || ShouldFirstL) {
40174036
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
40184037
std::swap(LHS, RHS);
40194038
std::swap(CanNegateL, CanNegateR);
@@ -4069,7 +4088,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
40694088
AArch64CC::CondCode &OutCC) {
40704089
bool DummyCanNegate;
40714090
bool DummyMustBeFirst;
4072-
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
4091+
bool DummyPreferFirst;
4092+
if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst,
4093+
DummyPreferFirst, false))
40734094
return SDValue();
40744095

40754096
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
3+
4+
define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
5+
; CHECK-LABEL: test_single_or:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: subs x8, x2, x1
8+
; CHECK-NEXT: ccmp x2, x0, #2, hs
9+
; CHECK-NEXT: csel x0, xzr, x8, hi
10+
; CHECK-NEXT: ret
11+
%cmp.match = icmp ult i64 %y, %x
12+
%cmp.nomatch = icmp ugt i64 %y, %unrelated
13+
%or.cond = or i1 %cmp.match, %cmp.nomatch
14+
%sub.reuse = sub nuw i64 %y, %x
15+
%res = select i1 %or.cond, i64 0, i64 %sub.reuse
16+
ret i64 %res
17+
}
18+
19+
define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
20+
; CHECK-LABEL: test_two_ors:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: subs x8, x2, x1
23+
; CHECK-NEXT: ccmp x0, x1, #0, hs
24+
; CHECK-NEXT: ccmp x2, x0, #2, hs
25+
; CHECK-NEXT: csel x0, xzr, x8, hi
26+
; CHECK-NEXT: ret
27+
%cmp.match = icmp ult i64 %y, %x
28+
%cmp.nomatch1 = icmp ult i64 %unrelated, %x
29+
%cmp.nomatch2 = icmp ugt i64 %y, %unrelated
30+
%or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
31+
%or.cond = or i1 %cmp.match, %or.nomatch
32+
%sub.reuse = sub nuw i64 %y, %x
33+
%res = select i1 %or.cond, i64 0, i64 %sub.reuse
34+
ret i64 %res
35+
}
36+
37+
define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
38+
; CHECK-LABEL: test_two_ors_commuted:
39+
; CHECK: // %bb.0:
40+
; CHECK-NEXT: subs x8, x2, x1
41+
; CHECK-NEXT: ccmp x0, x1, #0, hs
42+
; CHECK-NEXT: ccmp x2, x0, #2, hs
43+
; CHECK-NEXT: csel x0, xzr, x8, hi
44+
; CHECK-NEXT: ret
45+
%cmp.match = icmp ult i64 %y, %x
46+
%cmp.nomatch1 = icmp ult i64 %unrelated, %x
47+
%cmp.nomatch2 = icmp ugt i64 %y, %unrelated
48+
%or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
49+
%or.cond = or i1 %or.nomatch, %cmp.match
50+
%sub.reuse = sub nuw i64 %y, %x
51+
%res = select i1 %or.cond, i64 0, i64 %sub.reuse
52+
ret i64 %res
53+
}
54+
55+
define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
56+
; CHECK-LABEL: test_single_and:
57+
; CHECK: // %bb.0:
58+
; CHECK-NEXT: subs x8, x2, x1
59+
; CHECK-NEXT: ccmp x2, x0, #0, lo
60+
; CHECK-NEXT: csel x0, xzr, x8, hi
61+
; CHECK-NEXT: ret
62+
%cmp.match = icmp ult i64 %y, %x
63+
%cmp.nomatch = icmp ugt i64 %y, %unrelated
64+
%and.cond = and i1 %cmp.match, %cmp.nomatch
65+
%sub.reuse = sub nuw i64 %y, %x
66+
%res = select i1 %and.cond, i64 0, i64 %sub.reuse
67+
ret i64 %res
68+
}
69+
70+
define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
71+
; CHECK-LABEL: test_single_or_sub_commuted:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: subs x8, x1, x2
74+
; CHECK-NEXT: ccmp x2, x0, #2, ls
75+
; CHECK-NEXT: csel x0, xzr, x8, hi
76+
; CHECK-NEXT: ret
77+
%cmp.match = icmp ult i64 %y, %x
78+
%cmp.nomatch = icmp ugt i64 %y, %unrelated
79+
%or.cond = or i1 %cmp.match, %cmp.nomatch
80+
%sub.reuse = sub nuw i64 %x, %y
81+
%res = select i1 %or.cond, i64 0, i64 %sub.reuse
82+
ret i64 %res
83+
}
84+
85+
; Negative test: We must negate the or operation, hence this must come first.
86+
define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind {
87+
; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative:
88+
; CHECK: // %bb.0:
89+
; CHECK-NEXT: cmp x2, x0
90+
; CHECK-NEXT: sub x8, x2, x1
91+
; CHECK-NEXT: ccmp x0, x1, #0, ls
92+
; CHECK-NEXT: ccmp x2, x1, #2, lo
93+
; CHECK-NEXT: csel x0, xzr, x8, lo
94+
; CHECK-NEXT: ret
95+
%cmp.match = icmp ult i64 %y, %x
96+
%cmp.nomatch1 = icmp ult i64 %unrelated, %x
97+
%cmp.nomatch2 = icmp ugt i64 %y, %unrelated
98+
%or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
99+
%and.cond = and i1 %or.nomatch, %cmp.match
100+
%sub.reuse = sub nuw i64 %y, %x
101+
%res = select i1 %and.cond, i64 0, i64 %sub.reuse
102+
ret i64 %res
103+
}
104+
105+
; Negative test: There is no analogue of SUBS for floating point.
106+
define float @test_negative_float(float %unrelated, float %x, float %y) nounwind {
107+
; CHECK-LABEL: test_negative_float:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: fcmp s2, s0
110+
; CHECK-NEXT: fsub s0, s2, s1
111+
; CHECK-NEXT: movi d3, #0000000000000000
112+
; CHECK-NEXT: fccmp s2, s1, #8, le
113+
; CHECK-NEXT: fcsel s0, s3, s0, mi
114+
; CHECK-NEXT: ret
115+
%cmp.nomatch1 = fcmp olt float %y, %x
116+
%cmp.nomatch2 = fcmp ogt float %y, %unrelated
117+
%or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2
118+
%sub.noreuse = fsub float %y, %x
119+
%res = select i1 %or.cond, float 0.0, float %sub.noreuse
120+
ret float %res
121+
}
122+
123+
; Negative test: If both operands match a sub, do not reorder them.
124+
define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind {
125+
; CHECK-LABEL: test_prefer_right_negative:
126+
; CHECK: // %bb.0:
127+
; CHECK-NEXT: cmp x2, x0
128+
; CHECK-NEXT: ccmp x2, x1, #0, ls
129+
; CHECK-NEXT: csel x8, x0, x1, lo
130+
; CHECK-NEXT: sub x0, x2, x8
131+
; CHECK-NEXT: ret
132+
%cmp.match1 = icmp ult i64 %z, %y
133+
%cmp.match2 = icmp ugt i64 %z, %x
134+
%or.cond = or i1 %cmp.match1, %cmp.match2
135+
%sub.reuse1 = sub nuw i64 %z, %y
136+
%sub.reuse2 = sub nuw i64 %z, %x
137+
%res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1
138+
ret i64 %res
139+
}

0 commit comments

Comments
 (0)