Skip to content

Commit 1b7ae0b

Browse files
authored
[AArch64] Use umin for x != 0 when +cssc is enabled (llvm#169159)
Closes llvm#161584
1 parent fd19a20 commit 1b7ae0b

File tree

3 files changed

+344
-3
lines changed

3 files changed

+344
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11738,7 +11738,12 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1173811738
}
1173911739

1174011740
if (LHS.getValueType().isInteger()) {
11741-
11741+
if (Subtarget->hasCSSC() && CC == ISD::SETNE && isNullConstant(RHS)) {
11742+
SDValue One = DAG.getConstant(1, DL, LHS.getValueType());
11743+
SDValue UMin = DAG.getNode(ISD::UMIN, DL, LHS.getValueType(), LHS, One);
11744+
SDValue Res = DAG.getZExtOrTrunc(UMin, DL, VT);
11745+
return IsStrict ? DAG.getMergeValues({Res, Chain}, DL) : Res;
11746+
}
1174211747
simplifySetCCIntoEq(CC, LHS, RHS, DAG, DL);
1174311748

1174411749
SDValue CCVal;
Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
; RUN: llc -mtriple=aarch64-- -o - < %s | FileCheck %s --check-prefix=CHECK-SD
2+
; RUN: llc -mtriple=aarch64-- -mattr=+cssc -o - < %s | FileCheck %s --check-prefix=CHECK-CSSC
3+
4+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
; auto icmpi64(long x0) { return x0 != 0; }
8+
define i1 @icmpi64(i64 noundef %0) {
9+
; CHECK-SD-LABEL: icmpi64:
10+
; CHECK-SD: // %bb.0: // %entry
11+
; CHECK-SD-NEXT: cmp x0, #0
12+
; CHECK-SD-NEXT: cset w0, ne
13+
; CHECK-SD-NEXT: ret
14+
;
15+
; CHECK-CSSC-LABEL: icmpi64:
16+
; CHECK-CSSC: // %bb.0: // %entry
17+
; CHECK-CSSC-NEXT: umin x0, x0, #1
18+
; CHECK-CSSC-NEXT: // kill: def $w0 killed $w0 killed $x0
19+
; CHECK-CSSC-NEXT: ret
20+
;
21+
entry:
22+
%2 = icmp ne i64 %0, 0
23+
ret i1 %2
24+
}
25+
26+
; auto icmpi32(int x0) { return x0 != 0; }
27+
define i1 @icmpi32(i32 noundef %0) {
28+
; CHECK-SD-LABEL: icmpi32:
29+
; CHECK-SD: // %bb.0: // %entry
30+
; CHECK-SD-NEXT: cmp w0, #0
31+
; CHECK-SD-NEXT: cset w0, ne
32+
; CHECK-SD-NEXT: ret
33+
;
34+
; CHECK-CSSC-LABEL: icmpi32:
35+
; CHECK-CSSC: // %bb.0: // %entry
36+
; CHECK-CSSC-NEXT: umin w0, w0, #1
37+
; CHECK-CSSC-NEXT: ret
38+
;
39+
entry:
40+
%2 = icmp ne i32 %0, 0
41+
ret i1 %2
42+
}
43+
44+
; auto icmpi16(short x0) { return x0 != 0; }
45+
define i1 @icmpi16(i16 noundef %0) {
46+
; CHECK-SD-LABEL: icmpi16:
47+
; CHECK-SD: // %bb.0: // %entry
48+
; CHECK-SD-NEXT: tst w0, #0xffff
49+
; CHECK-SD-NEXT: cset w0, ne
50+
; CHECK-SD-NEXT: ret
51+
;
52+
; CHECK-CSSC-LABEL: icmpi16:
53+
; CHECK-CSSC: // %bb.0: // %entry
54+
; CHECK-CSSC-NEXT: and w8, w0, #0xffff
55+
; CHECK-CSSC-NEXT: umin w0, w8, #1
56+
; CHECK-CSSC-NEXT: ret
57+
;
58+
entry:
59+
%2 = icmp ne i16 %0, 0
60+
ret i1 %2
61+
}
62+
63+
; auto icmpi8(char x0) { return x0 != 0; }
64+
define i1 @icmpi8(i8 noundef %0) {
65+
; CHECK-SD-LABEL: icmpi8:
66+
; CHECK-SD: // %bb.0: // %entry
67+
; CHECK-SD-NEXT: tst w0, #0xff
68+
; CHECK-SD-NEXT: cset w0, ne
69+
; CHECK-SD-NEXT: ret
70+
;
71+
; CHECK-CSSC-LABEL: icmpi8:
72+
; CHECK-CSSC: // %bb.0: // %entry
73+
; CHECK-CSSC-NEXT: and w8, w0, #0xff
74+
; CHECK-CSSC-NEXT: umin w0, w8, #1
75+
; CHECK-CSSC-NEXT: ret
76+
;
77+
entry:
78+
%2 = icmp ne i8 %0, 0
79+
ret i1 %2
80+
}
81+
82+
; unsigned long icmpi64i8(char x0) { return x0 != 0; }
83+
define i64 @icmpi64i8(i8 noundef %0) {
84+
; CHECK-SD-LABEL: icmpi64i8:
85+
; CHECK-SD: // %bb.0: // %entry
86+
; CHECK-SD-NEXT: tst w0, #0xff
87+
; CHECK-SD-NEXT: cset w0, ne
88+
; CHECK-SD-NEXT: ret
89+
;
90+
; CHECK-CSSC-LABEL: icmpi64i8:
91+
; CHECK-CSSC: // %bb.0: // %entry
92+
; CHECK-CSSC-NEXT: and w8, w0, #0xff
93+
; CHECK-CSSC-NEXT: umin w0, w8, #1
94+
; CHECK-CSSC-NEXT: ret
95+
;
96+
entry:
97+
%1 = icmp ne i8 %0, 0
98+
%2 = zext i1 %1 to i64
99+
ret i64 %2
100+
}
101+
102+
; unsigned long setcc_i8_i64(char x0) { return x0 != 0; }
103+
define i8 @setcc_i8_i64(i64 %x) {
104+
; CHECK-SD-LABEL: setcc_i8_i64:
105+
; CHECK-SD: // %bb.0: // %entry
106+
; CHECK-SD-NEXT: cmp x0, #0
107+
; CHECK-SD-NEXT: cset w0, ne
108+
; CHECK-SD-NEXT: ret
109+
;
110+
; CHECK-CSSC-LABEL: setcc_i8_i64:
111+
; CHECK-CSSC: // %bb.0: // %entry
112+
; CHECK-CSSC-NEXT: umin x0, x0, #1
113+
; CHECK-CSSC-NEXT: // kill: def $w0 killed $w0 killed $x0
114+
; CHECK-CSSC-NEXT: ret
115+
;
116+
entry:
117+
%cmp = icmp ne i64 %x, 0
118+
%conv = zext i1 %cmp to i8
119+
ret i8 %conv
120+
}
121+
122+
; short setcc_i16_i32(int x0) { return x0 != 0; }
123+
define i16 @setcc_i16_i32(i32 %x) {
124+
; CHECK-SD-LABEL: setcc_i16_i32:
125+
; CHECK-SD: // %bb.0: // %entry
126+
; CHECK-SD-NEXT: cmp w0, #0
127+
; CHECK-SD-NEXT: cset w0, ne
128+
; CHECK-SD-NEXT: ret
129+
;
130+
; CHECK-CSSC-LABEL: setcc_i16_i32:
131+
; CHECK-CSSC: // %bb.0: // %entry
132+
; CHECK-CSSC-NEXT: umin w0, w0, #1
133+
; CHECK-CSSC-NEXT: ret
134+
;
135+
entry:
136+
%cmp = icmp ne i32 %x, 0
137+
%conv = zext i1 %cmp to i16
138+
ret i16 %conv
139+
}
140+
141+
; int setcc_i32_i64(unsigned long x0) { return x0 != 0; }
142+
define i32 @setcc_i32_i64(i64 %x) {
143+
; CHECK-SD-LABEL: setcc_i32_i64:
144+
; CHECK-SD: // %bb.0: // %entry
145+
; CHECK-SD-NEXT: cmp x0, #0
146+
; CHECK-SD-NEXT: cset w0, ne
147+
; CHECK-SD-NEXT: ret
148+
;
149+
; CHECK-CSSC-LABEL: setcc_i32_i64:
150+
; CHECK-CSSC: // %bb.0: // %entry
151+
; CHECK-CSSC-NEXT: umin x0, x0, #1
152+
; CHECK-CSSC-NEXT: // kill: def $w0 killed $w0 killed $x0
153+
; CHECK-CSSC-NEXT: ret
154+
;
155+
entry:
156+
%cmp = icmp ne i64 %x, 0
157+
%conv = zext i1 %cmp to i32
158+
ret i32 %conv
159+
}
160+
161+
; unsigned long setcc_i64_i64(unsigned long x0) { return x0 != 0; }
162+
define i64 @setcc_i64_i64(i64 %x) {
163+
; CHECK-SD-LABEL: setcc_i64_i64:
164+
; CHECK-SD: // %bb.0: // %entry
165+
; CHECK-SD-NEXT: cmp x0, #0
166+
; CHECK-SD-NEXT: cset w0, ne
167+
; CHECK-SD-NEXT: ret
168+
;
169+
; CHECK-CSSC-LABEL: setcc_i64_i64:
170+
; CHECK-CSSC: // %bb.0: // %entry
171+
; CHECK-CSSC-NEXT: umin x0, x0, #1
172+
; CHECK-CSSC-NEXT: ret
173+
;
174+
entry:
175+
%cmp = icmp ne i64 %x, 0
176+
%conv = zext i1 %cmp to i64
177+
ret i64 %conv
178+
}
179+
180+
define <2 x i1> @setcc_v2i1_v2i64(<2 x i64> %x) {
181+
; CHECK-SD-LABEL: setcc_v2i1_v2i64:
182+
; CHECK-SD: // %bb.0: // %entry
183+
; CHECK-SD-NEXT: cmtst v0.2d, v0.2d, v0.2d
184+
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
185+
; CHECK-SD-NEXT: ret
186+
;
187+
; CHECK-CSSC-LABEL: setcc_v2i1_v2i64:
188+
; CHECK-CSSC: // %bb.0: // %entry
189+
; CHECK-CSSC-NEXT: cmtst v0.2d, v0.2d, v0.2d
190+
; CHECK-CSSC-NEXT: xtn v0.2s, v0.2d
191+
; CHECK-CSSC-NEXT: ret
192+
;
193+
entry:
194+
%cmp = icmp ne <2 x i64> %x, zeroinitializer
195+
ret <2 x i1> %cmp
196+
}
197+
198+
define <4 x i1> @setcc_v4i1_v4i32(<4 x i32> %x) {
199+
; CHECK-SD-LABEL: setcc_v4i1_v4i32:
200+
; CHECK-SD: // %bb.0: // %entry
201+
; CHECK-SD-NEXT: cmtst v0.4s, v0.4s, v0.4s
202+
; CHECK-SD-NEXT: xtn v0.4h, v0.4s
203+
; CHECK-SD-NEXT: ret
204+
;
205+
; CHECK-CSSC-LABEL: setcc_v4i1_v4i32:
206+
; CHECK-CSSC: // %bb.0: // %entry
207+
; CHECK-CSSC-NEXT: cmtst v0.4s, v0.4s, v0.4s
208+
; CHECK-CSSC-NEXT: xtn v0.4h, v0.4s
209+
; CHECK-CSSC-NEXT: ret
210+
;
211+
entry:
212+
%cmp = icmp ne <4 x i32> %x, zeroinitializer
213+
ret <4 x i1> %cmp
214+
}
215+
216+
define <8 x i1> @setcc_v8i1_v8i16(<8 x i16> %x) {
217+
; CHECK-SD-LABEL: setcc_v8i1_v8i16:
218+
; CHECK-SD: // %bb.0: // %entry
219+
; CHECK-SD-NEXT: cmtst v0.8h, v0.8h, v0.8h
220+
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
221+
; CHECK-SD-NEXT: ret
222+
;
223+
; CHECK-CSSC-LABEL: setcc_v8i1_v8i16:
224+
; CHECK-CSSC: // %bb.0: // %entry
225+
; CHECK-CSSC-NEXT: cmtst v0.8h, v0.8h, v0.8h
226+
; CHECK-CSSC-NEXT: xtn v0.8b, v0.8h
227+
; CHECK-CSSC-NEXT: ret
228+
;
229+
entry:
230+
%cmp = icmp ne <8 x i16> %x, zeroinitializer
231+
ret <8 x i1> %cmp
232+
}
233+
234+
define <16 x i1> @setcc_v16i1_v16i8(<16 x i8> %x) {
235+
; CHECK-SD-LABEL: setcc_v16i1_v16i8:
236+
; CHECK-SD: // %bb.0: // %entry
237+
; CHECK-SD-NEXT: cmtst v0.16b, v0.16b, v0.16b
238+
; CHECK-SD-NEXT: ret
239+
;
240+
; CHECK-CSSC-LABEL: setcc_v16i1_v16i8:
241+
; CHECK-CSSC: // %bb.0: // %entry
242+
; CHECK-CSSC-NEXT: cmtst v0.16b, v0.16b, v0.16b
243+
; CHECK-CSSC-NEXT: ret
244+
;
245+
entry:
246+
%cmp = icmp ne <16 x i8> %x, zeroinitializer
247+
ret <16 x i1> %cmp
248+
}
249+
250+
define <2 x i8> @setcc_v2i8_v2i64(<2 x i64> %x) {
251+
; CHECK-SD-LABEL: setcc_v2i8_v2i64:
252+
; CHECK-SD: // %bb.0: // %entry
253+
; CHECK-SD-NEXT: cmtst v0.2d, v0.2d, v0.2d
254+
; CHECK-SD-NEXT: movi v1.2s, #1
255+
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
256+
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
257+
; CHECK-SD-NEXT: ret
258+
;
259+
; CHECK-CSSC-LABEL: setcc_v2i8_v2i64:
260+
; CHECK-CSSC: // %bb.0: // %entry
261+
; CHECK-CSSC-NEXT: cmtst v0.2d, v0.2d, v0.2d
262+
; CHECK-CSSC-NEXT: movi v1.2s, #1
263+
; CHECK-CSSC-NEXT: xtn v0.2s, v0.2d
264+
; CHECK-CSSC-NEXT: and v0.8b, v0.8b, v1.8b
265+
; CHECK-CSSC-NEXT: ret
266+
;
267+
entry:
268+
%cmp = icmp ne <2 x i64> %x, zeroinitializer
269+
%conv = zext <2 x i1> %cmp to <2 x i8>
270+
ret <2 x i8> %conv
271+
}
272+
273+
define <4 x i16> @setcc_v4i16_v4i32(<4 x i32> %x) {
274+
; CHECK-SD-LABEL: setcc_v4i16_v4i32:
275+
; CHECK-SD: // %bb.0: // %entry
276+
; CHECK-SD-NEXT: cmtst v0.4s, v0.4s, v0.4s
277+
; CHECK-SD-NEXT: movi v1.4h, #1
278+
; CHECK-SD-NEXT: xtn v0.4h, v0.4s
279+
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
280+
; CHECK-SD-NEXT: ret
281+
;
282+
; CHECK-CSSC-LABEL: setcc_v4i16_v4i32:
283+
; CHECK-CSSC: // %bb.0: // %entry
284+
; CHECK-CSSC-NEXT: cmtst v0.4s, v0.4s, v0.4s
285+
; CHECK-CSSC-NEXT: movi v1.4h, #1
286+
; CHECK-CSSC-NEXT: xtn v0.4h, v0.4s
287+
; CHECK-CSSC-NEXT: and v0.8b, v0.8b, v1.8b
288+
; CHECK-CSSC-NEXT: ret
289+
;
290+
entry:
291+
%cmp = icmp ne <4 x i32> %x, zeroinitializer
292+
%conv = zext <4 x i1> %cmp to <4 x i16>
293+
ret <4 x i16> %conv
294+
}
295+
296+
define <4 x i32> @setcc_v4i32_v4i32(<4 x i32> %x) {
297+
; CHECK-SD-LABEL: setcc_v4i32_v4i32:
298+
; CHECK-SD: // %bb.0: // %entry
299+
; CHECK-SD-NEXT: movi v1.4s, #1
300+
; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, #0
301+
; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
302+
; CHECK-SD-NEXT: ret
303+
;
304+
; CHECK-CSSC-LABEL: setcc_v4i32_v4i32:
305+
; CHECK-CSSC: // %bb.0: // %entry
306+
; CHECK-CSSC-NEXT: movi v1.4s, #1
307+
; CHECK-CSSC-NEXT: cmeq v0.4s, v0.4s, #0
308+
; CHECK-CSSC-NEXT: bic v0.16b, v1.16b, v0.16b
309+
; CHECK-CSSC-NEXT: ret
310+
;
311+
entry:
312+
%cmp = icmp ne <4 x i32> %x, zeroinitializer
313+
%conv = zext <4 x i1> %cmp to <4 x i32>
314+
ret <4 x i32> %conv
315+
}
316+
317+
; auto icmpi128(int128 x0) { return x0 != 0; }
318+
define i1 @icmpi128(i128 noundef %0) {
319+
; CHECK-SD-LABEL: icmpi128:
320+
; CHECK-SD: // %bb.0: // %entry
321+
; CHECK-SD-NEXT: orr x8, x0, x1
322+
; CHECK-SD-NEXT: cmp x8, #0
323+
; CHECK-SD-NEXT: cset w0, ne
324+
; CHECK-SD-NEXT: ret
325+
;
326+
; CHECK-CSSC-LABEL: icmpi128:
327+
; CHECK-CSSC: // %bb.0: // %entry
328+
; CHECK-CSSC-NEXT: orr x8, x0, x1
329+
; CHECK-CSSC-NEXT: umin x0, x8, #1
330+
; CHECK-CSSC-NEXT: // kill: def $w0 killed $w0 killed $x0
331+
; CHECK-CSSC-NEXT: ret
332+
;
333+
entry:
334+
%2 = icmp ne i128 %0, 0
335+
ret i1 %2
336+
}

llvm/test/CodeGen/AArch64/arm64-popcnt.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,8 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
414414
; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
415415
; CHECK-CSSC: // %bb.0: // %entry
416416
; CHECK-CSSC-NEXT: sub w8, w0, #1
417-
; CHECK-CSSC-NEXT: tst w0, w8
418-
; CHECK-CSSC-NEXT: cset w0, ne
417+
; CHECK-CSSC-NEXT: and w8, w0, w8
418+
; CHECK-CSSC-NEXT: umin w0, w8, #1
419419
; CHECK-CSSC-NEXT: ret
420420
;
421421
; CHECK-BE-LABEL: ctpop32_ne_one_nonzero:

0 commit comments

Comments
 (0)