Skip to content

Commit e1ba30b

Browse files
committed
Edits
1 parent 83f8ad0 commit e1ba30b

File tree

5 files changed

+320
-116
lines changed

5 files changed

+320
-116
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6216,16 +6216,22 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
62166216
SDLoc(N), VT, N0, N1))
62176217
return SD;
62186218

6219-
// (umin (sub a, b) a) -> (usubo a, b); (select usubo.1, a, usubo.0)
6220-
{
6219+
if (TLI.isOperationLegalOrCustom(ISD::USUBO, VT)) {
62216220
SDValue B;
6222-
if (sd_match(N0, m_Sub(m_Specific(N1), m_Value(B))) &&
6223-
TLI.isOperationLegalOrCustom(ISD::USUBO, VT)) {
6224-
EVT SETCCT = getSetCCResultType(VT);
6225-
SDVTList VTs = DAG.getVTList(VT, SETCCT);
6221+
6222+
// (umin (sub a, b), a) -> (usubo a, b); (select usubo.1, a, usubo.0)
6223+
if (sd_match(N0, m_Sub(m_Specific(N1), m_Value(B)))) {
6224+
SDVTList VTs = DAG.getVTList(VT, getSetCCResultType(VT));
62266225
SDValue USO = DAG.getNode(ISD::USUBO, DL, VTs, N1, B);
62276226
return DAG.getSelect(DL, VT, USO.getValue(1), N1, USO.getValue(0));
62286227
}
6228+
6229+
// (umin a, (sub a, b)) -> (usubo a, b); (select usubo.1, a, usubo.0)
6230+
if (sd_match(N1, m_Sub(m_Specific(N0), m_Value(B)))) {
6231+
SDVTList VTs = DAG.getVTList(VT, getSetCCResultType(VT));
6232+
SDValue USO = DAG.getNode(ISD::USUBO, DL, VTs, N0, B);
6233+
return DAG.getSelect(DL, VT, USO.getValue(1), N0, USO.getValue(0));
6234+
}
62296235
}
62306236

62316237
// Simplify the operands using demanded-bits information.
@@ -9386,7 +9392,7 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
93869392
// Check if the bytes offsets we are looking at match with either big or
93879393
// little endian value loaded. Return true for big endian, false for little
93889394
// endian, and std::nullopt if match failed.
9389-
static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9395+
static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
93909396
int64_t FirstOffset) {
93919397
// The endian can be decided only when it is 2 bytes at least.
93929398
unsigned Width = ByteOffsets.size();
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
2+
3+
; GitHub issue #161036
4+
5+
; Positive test : umin(sub(a,b),a) with scalar types should be folded
6+
define i64 @underflow_compare_fold_i64(i64 %a, i64 %b) {
7+
; CHECK-LABEL: underflow_compare_fold_i64
8+
; CHECK-LABEL: %bb.0:
9+
; CHECK-NEXT: subs x8, x0, x1
10+
; CHECK-NEXT: csel x0, x0, x8, lo
11+
; CHECK-NEXT: ret
12+
%sub = sub i64 %a, %b
13+
%cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a)
14+
ret i64 %cond
15+
}
16+
17+
; Positive test : umin(a,sub(a,b)) with scalar types should be folded
18+
define i64 @underflow_compare_fold_i64_commute(i64 %a, i64 %b) {
19+
; CHECK-LABEL: underflow_compare_fold_i64_commute
20+
; CHECK-LABEL: %bb.0:
21+
; CHECK-NEXT: subs x8, x0, x1
22+
; CHECK-NEXT: csel x0, x0, x8, lo
23+
; CHECK-NEXT: ret
24+
%sub = sub i64 %a, %b
25+
%cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub)
26+
ret i64 %cond
27+
}
28+
29+
; Positive test : multi-use is OK since the sub instruction still runs once
30+
define i64 @underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) {
31+
; CHECK-LABEL: underflow_compare_fold_i64_multi_use
32+
; CHECK-LABEL: %bb.0:
33+
; CHECK-NEXT: subs x8, x0, x1
34+
; CHECK-NEXT: csel x0, x0, x8, lo
35+
; CHECK-NEXT: str x8, [x2]
36+
; CHECK-NEXT: ret
37+
%sub = sub i64 %a, %b
38+
store i64 %sub, ptr addrspace(1) %ptr
39+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
40+
ret i64 %cond
41+
}
42+
43+
; Positive test : i32
44+
define i32 @underflow_compare_fold_i32(i32 %a, i32 %b) {
45+
; CHECK-LABEL: underflow_compare_fold_i32
46+
; CHECK-LABEL: %bb.0:
47+
; CHECK-NEXT: subs w8, w0, w1
48+
; CHECK-NEXT: csel w0, w0, w8, lo
49+
; CHECK-NEXT: ret
50+
%sub = sub i32 %a, %b
51+
%cond = tail call i32 @llvm.umin.i32(i32 %sub, i32 %a)
52+
ret i32 %cond
53+
}
54+
55+
; Positive test : i32
56+
define i32 @underflow_compare_fold_i32_commute(i32 %a, i32 %b) {
57+
; CHECK-LABEL: underflow_compare_fold_i32_commute
58+
; CHECK-LABEL: %bb.0:
59+
; CHECK-NEXT: subs w8, w0, w1
60+
; CHECK-NEXT: csel w0, w0, w8, lo
61+
; CHECK-NEXT: ret
62+
%sub = sub i32 %a, %b
63+
%cond = tail call i32 @llvm.umin.i32(i32 %a, i32 %sub)
64+
ret i32 %cond
65+
}
66+
67+
; Positive test : i32
68+
define i32 @underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) {
69+
; CHECK-LABEL: underflow_compare_fold_i32_multi_use
70+
; CHECK-LABEL: %bb.0:
71+
; CHECK-NEXT: subs w8, w0, w1
72+
; CHECK-NEXT: csel w0, w0, w8, lo
73+
; CHECK-NEXT: str w8, [x2]
74+
; CHECK-NEXT: ret
75+
%sub = sub i32 %a, %b
76+
store i32 %sub, ptr addrspace(1) %ptr
77+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
78+
ret i32 %cond
79+
}
80+
81+
; Negative test : i16
82+
define i16 @underflow_compare_fold_i16(i16 %a, i16 %b) {
83+
; CHECK-LABEL: underflow_compare_fold_i16
84+
; CHECK-LABEL: %bb.0:
85+
; CHECK-LABEL: sub w8, w0, w1
86+
; CHECK-LABEL: and w9, w0, #0xffff
87+
; CHECK-LABEL: and w8, w8, #0xffff
88+
; CHECK-LABEL: cmp w8, w9
89+
; CHECK-LABEL: csel w0, w8, w9, lo
90+
; CHECK-LABEL: ret
91+
%sub = sub i16 %a, %b
92+
%cond = tail call i16 @llvm.umin.i16(i16 %sub, i16 %a)
93+
ret i16 %cond
94+
}
95+
96+
; Negative test : i16
97+
define i16 @underflow_compare_fold_i16_commute(i16 %a, i16 %b) {
98+
; CHECK-LABEL: underflow_compare_fold_i16_commute
99+
; CHECK-LABEL: %bb.0:
100+
; CHECK-LABEL: sub w8, w0, w1
101+
; CHECK-LABEL: and w9, w0, #0xffff
102+
; CHECK-LABEL: and w8, w8, #0xffff
103+
; CHECK-LABEL: cmp w9, w8
104+
; CHECK-LABEL: csel w0, w9, w8, lo
105+
; CHECK-LABEL: ret
106+
%sub = sub i16 %a, %b
107+
%cond = tail call i16 @llvm.umin.i16(i16 %a, i16 %sub)
108+
ret i16 %cond
109+
}
110+
111+
; Negative test : i16
112+
define i16 @underflow_compare_fold_i16_multi_use(i16 %a, i16 %b, ptr addrspace(1) %ptr) {
113+
; CHECK-LABEL: underflow_compare_fold_i16_multi_use
114+
; CHECK-LABEL: %bb.0:
115+
; CHECK-LABEL: sub w8, w0, w1
116+
; CHECK-LABEL: and w9, w0, #0xffff
117+
; CHECK-LABEL: and w10, w8, #0xffff
118+
; CHECK-LABEL: strh w8, [x2]
119+
; CHECK-LABEL: cmp w10, w9
120+
; CHECK-LABEL: csel w0, w10, w9, lo
121+
; CHECK-LABEL: ret
122+
%sub = sub i16 %a, %b
123+
store i16 %sub, ptr addrspace(1) %ptr
124+
%cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
125+
ret i16 %cond
126+
}
127+
128+
; Negative test, vector types : umin(sub(a,b),a) but with vectors
129+
define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) {
130+
; CHECK-LABEL: underflow_compare_dontfold_vectors
131+
; CHECK-LABEL: %bb.0
132+
; CHECK-NEXT: sub v1.16b, v0.16b, v1.16b
133+
; CHECK-NEXT: umin v0.16b, v1.16b, v0.16b
134+
; CHECK-NEXT: ret
135+
%sub = sub <16 x i8> %a, %b
136+
%cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a)
137+
ret <16 x i8> %cond
138+
}
139+
140+
; Negative test, pattern mismatch : umin(add(a,b),a)
141+
define i64 @umin_add(i64 %a, i64 %b) {
142+
; CHECK-LABEL: umin_add
143+
; CHECK-LABEL: %bb.0
144+
; CHECK-NEXT: add x8, x0, x1
145+
; CHECK-NEXT: cmp x8, x0
146+
; CHECK-NEXT: csel x0, x8, x0, lo
147+
; CHECK-NEXT: ret
148+
%add = add i64 %a, %b
149+
%cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a)
150+
ret i64 %cond
151+
}

llvm/test/CodeGen/AArch64/underflow-compare-fold.ll

Lines changed: 0 additions & 53 deletions
This file was deleted.

0 commit comments

Comments
 (0)