Skip to content

Commit 939a425

Browse files
committed
Pre-commit test for setcc removal by using add/sub carryout
Signed-off-by: John Lu <[email protected]>
1 parent 5088795 commit 939a425

File tree

1 file changed

+186
-0
lines changed

1 file changed

+186
-0
lines changed
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
;; Test that carryout from 64-bit add/sub (synthesized from two 32-bit adds/subs) is utilized
3+
;; (i.e. no additional compare is generated).
4+
5+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
6+
7+
%0 = type { i64, i64, i32, i32 }
8+
%1 = type { [64 x [8 x i64]] }
9+
%struct.uint96 = type { i64, i32 }
10+
%struct.uint64pair = type { i64, i64 }
11+
12+
declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
13+
declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
14+
15+
declare {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64>, <2 x i64>)
16+
declare {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64>, <2 x i64>)
17+
18+
define hidden %struct.uint96 @add64_32(i64 %val64A, i64 %val64B, i32 %val32) {
19+
; CHECK-LABEL: add64_32:
20+
; CHECK: ; %bb.0:
21+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22+
; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2
23+
; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v1, v3, vcc
24+
; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, v[5:6], v[0:1]
25+
; CHECK-NEXT: v_mov_b32_e32 v0, v5
26+
; CHECK-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v4, vcc
27+
; CHECK-NEXT: v_mov_b32_e32 v1, v6
28+
; CHECK-NEXT: s_setpc_b64 s[30:31]
29+
%sum64 = add i64 %val64A, %val64B
30+
%obit = icmp ult i64 %sum64, %val64A
31+
%obit32 = zext i1 %obit to i32
32+
%sum32 = add i32 %val32, %obit32
33+
%.fca.0.insert = insertvalue %struct.uint96 poison, i64 %sum64, 0
34+
%.fca.1.insert = insertvalue %struct.uint96 %.fca.0.insert, i32 %sum32, 1
35+
ret %struct.uint96 %.fca.1.insert
36+
}
37+
38+
define <2 x i64> @uadd_v2i64(<2 x i64> %val0, <2 x i64> %val1, ptr %ptrval) {
39+
; CHECK-LABEL: uadd_v2i64:
40+
; CHECK: ; %bb.0:
41+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42+
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, v2, v6
43+
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc
44+
; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, v0, v4
45+
; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v5, vcc
46+
; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1]
47+
; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
48+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
49+
; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, v[6:7], v[2:3]
50+
; CHECK-NEXT: v_mov_b32_e32 v1, v0
51+
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
52+
; CHECK-NEXT: v_mov_b32_e32 v3, v2
53+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
54+
; CHECK-NEXT: s_setpc_b64 s[30:31]
55+
%pair = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64> %val0, <2 x i64> %val1)
56+
%val = extractvalue {<2 x i64>, <2 x i1>} %pair, 0
57+
%obit = extractvalue {<2 x i64>, <2 x i1>} %pair, 1
58+
%res = sext <2 x i1> %obit to <2 x i64>
59+
store <2 x i64> %val, ptr %ptrval
60+
ret <2 x i64> %res
61+
}
62+
63+
define <2 x i64> @usub_v2i64(<2 x i64> %val0, <2 x i64> %val1, ptr %ptrval) {
64+
; CHECK-LABEL: usub_v2i64:
65+
; CHECK: ; %bb.0:
66+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67+
; CHECK-NEXT: v_sub_co_u32_e32 v6, vcc, v2, v6
68+
; CHECK-NEXT: v_subb_co_u32_e32 v7, vcc, v3, v7, vcc
69+
; CHECK-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v4
70+
; CHECK-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v5, vcc
71+
; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[0:1]
72+
; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
73+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
74+
; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
75+
; CHECK-NEXT: v_mov_b32_e32 v1, v0
76+
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
77+
; CHECK-NEXT: v_mov_b32_e32 v3, v2
78+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
79+
; CHECK-NEXT: s_setpc_b64 s[30:31]
80+
%pair = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64> %val0, <2 x i64> %val1)
81+
%val = extractvalue {<2 x i64>, <2 x i1>} %pair, 0
82+
%obit = extractvalue {<2 x i64>, <2 x i1>} %pair, 1
83+
%res = sext <2 x i1> %obit to <2 x i64>
84+
store <2 x i64> %val, ptr %ptrval
85+
ret <2 x i64> %res
86+
}
87+
88+
define i64 @uadd_i64(i64 %val0, i64 %val1, ptr %ptrval) {
89+
; CHECK-LABEL: uadd_i64:
90+
; CHECK: ; %bb.0:
91+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92+
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
93+
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
94+
; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
95+
; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[2:3]
96+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
97+
; CHECK-NEXT: v_mov_b32_e32 v1, v0
98+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
99+
; CHECK-NEXT: s_setpc_b64 s[30:31]
100+
%pair = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val0, i64 %val1)
101+
%val = extractvalue {i64, i1} %pair, 0
102+
%obit = extractvalue {i64, i1} %pair, 1
103+
%res = sext i1 %obit to i64
104+
store i64 %val, ptr %ptrval
105+
ret i64 %res
106+
}
107+
108+
define i64 @uadd_p1(i64 %val0, i64 %val1, ptr %ptrval) {
109+
; CHECK-LABEL: uadd_p1:
110+
; CHECK: ; %bb.0:
111+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
113+
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
114+
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
115+
; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
116+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
117+
; CHECK-NEXT: v_mov_b32_e32 v1, v0
118+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
119+
; CHECK-NEXT: s_setpc_b64 s[30:31]
120+
%pair = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val0, i64 1)
121+
%val = extractvalue {i64, i1} %pair, 0
122+
%obit = extractvalue {i64, i1} %pair, 1
123+
%res = sext i1 %obit to i64
124+
store i64 %val, ptr %ptrval
125+
ret i64 %res
126+
}
127+
128+
define i64 @uadd_n1(i64 %val0, i64 %val1, ptr %ptrval) {
129+
; CHECK-LABEL: uadd_n1:
130+
; CHECK: ; %bb.0:
131+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132+
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, -1, v0
133+
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v1, vcc
134+
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
135+
; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[2:3]
136+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
137+
; CHECK-NEXT: v_mov_b32_e32 v1, v0
138+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
139+
; CHECK-NEXT: s_setpc_b64 s[30:31]
140+
%pair = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val0, i64 -1)
141+
%val = extractvalue {i64, i1} %pair, 0
142+
%obit = extractvalue {i64, i1} %pair, 1
143+
%res = sext i1 %obit to i64
144+
store i64 %val, ptr %ptrval
145+
ret i64 %res
146+
}
147+
148+
define i64 @usub_p1(i64 %val0, i64 %val1, ptr %ptrval) {
149+
; CHECK-LABEL: usub_p1:
150+
; CHECK: ; %bb.0:
151+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152+
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, -1, v0
153+
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v1, vcc
154+
; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
155+
; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[2:3]
156+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
157+
; CHECK-NEXT: v_mov_b32_e32 v1, v0
158+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
159+
; CHECK-NEXT: s_setpc_b64 s[30:31]
160+
%pair = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val0, i64 1)
161+
%val = extractvalue {i64, i1} %pair, 0
162+
%obit = extractvalue {i64, i1} %pair, 1
163+
%res = sext i1 %obit to i64
164+
store i64 %val, ptr %ptrval
165+
ret i64 %res
166+
}
167+
168+
define i64 @usub_n1(i64 %val0, i64 %val1, ptr %ptrval) {
169+
; CHECK-LABEL: usub_n1:
170+
; CHECK: ; %bb.0:
171+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172+
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, 1, v0
173+
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
174+
; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
175+
; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[2:3]
176+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
177+
; CHECK-NEXT: v_mov_b32_e32 v1, v0
178+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
179+
; CHECK-NEXT: s_setpc_b64 s[30:31]
180+
%pair = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val0, i64 -1)
181+
%val = extractvalue {i64, i1} %pair, 0
182+
%obit = extractvalue {i64, i1} %pair, 1
183+
%res = sext i1 %obit to i64
184+
store i64 %val, ptr %ptrval
185+
ret i64 %res
186+
}

0 commit comments

Comments
 (0)