Skip to content

Commit f3ea403

Browse files
arsenmDebadri Basak
authored andcommitted
AMDGPU: Add baseline test for llvm#161651 (llvm#165921)
1 parent 2fea151 commit f3ea403

File tree

1 file changed

+236
-0
lines changed

1 file changed

+236
-0
lines changed
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
4+
5+
define i16 @v_underflow_compare_fold_i16(i16 %a, i16 %b) #0 {
6+
; GFX9-LABEL: v_underflow_compare_fold_i16:
7+
; GFX9: ; %bb.0:
8+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; GFX9-NEXT: v_sub_u16_e32 v1, v0, v1
10+
; GFX9-NEXT: v_min_u16_e32 v0, v1, v0
11+
; GFX9-NEXT: s_setpc_b64 s[30:31]
12+
;
13+
; GFX11-LABEL: v_underflow_compare_fold_i16:
14+
; GFX11: ; %bb.0:
15+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16+
; GFX11-NEXT: v_sub_nc_u16 v0.h, v0.l, v1.l
17+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
18+
; GFX11-NEXT: v_min_u16 v0.l, v0.h, v0.l
19+
; GFX11-NEXT: s_setpc_b64 s[30:31]
20+
%sub = sub i16 %a, %b
21+
%cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
22+
ret i16 %cond
23+
}
24+
25+
define i32 @v_underflow_compare_fold_i32(i32 %a, i32 %b) #0 {
26+
; GFX9-LABEL: v_underflow_compare_fold_i32:
27+
; GFX9: ; %bb.0:
28+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
30+
; GFX9-NEXT: v_min_u32_e32 v0, v1, v0
31+
; GFX9-NEXT: s_setpc_b64 s[30:31]
32+
;
33+
; GFX11-LABEL: v_underflow_compare_fold_i32:
34+
; GFX11: ; %bb.0:
35+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
37+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
38+
; GFX11-NEXT: v_min_u32_e32 v0, v1, v0
39+
; GFX11-NEXT: s_setpc_b64 s[30:31]
40+
%sub = sub i32 %a, %b
41+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
42+
ret i32 %cond
43+
}
44+
45+
define i32 @v_underflow_compare_fold_i32_commute(i32 %a, i32 %b) #0 {
46+
; GFX9-LABEL: v_underflow_compare_fold_i32_commute:
47+
; GFX9: ; %bb.0:
48+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49+
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
50+
; GFX9-NEXT: v_min_u32_e32 v0, v0, v1
51+
; GFX9-NEXT: s_setpc_b64 s[30:31]
52+
;
53+
; GFX11-LABEL: v_underflow_compare_fold_i32_commute:
54+
; GFX11: ; %bb.0:
55+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
57+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
58+
; GFX11-NEXT: v_min_u32_e32 v0, v0, v1
59+
; GFX11-NEXT: s_setpc_b64 s[30:31]
60+
%sub = sub i32 %a, %b
61+
%cond = call i32 @llvm.umin.i32(i32 %a, i32 %sub)
62+
ret i32 %cond
63+
}
64+
65+
define i32 @v_underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) #0 {
66+
; GFX9-LABEL: v_underflow_compare_fold_i32_multi_use:
67+
; GFX9: ; %bb.0:
68+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69+
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1
70+
; GFX9-NEXT: v_min_u32_e32 v0, v1, v0
71+
; GFX9-NEXT: global_store_dword v[2:3], v1, off
72+
; GFX9-NEXT: s_waitcnt vmcnt(0)
73+
; GFX9-NEXT: s_setpc_b64 s[30:31]
74+
;
75+
; GFX11-LABEL: v_underflow_compare_fold_i32_multi_use:
76+
; GFX11: ; %bb.0:
77+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1
79+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
80+
; GFX11-NEXT: v_min_u32_e32 v0, v1, v0
81+
; GFX11-NEXT: global_store_b32 v[2:3], v1, off
82+
; GFX11-NEXT: s_setpc_b64 s[30:31]
83+
%sub = sub i32 %a, %b
84+
store i32 %sub, ptr addrspace(1) %ptr
85+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
86+
ret i32 %cond
87+
}
88+
89+
define i64 @v_underflow_compare_fold_i64(i64 %a, i64 %b) #0 {
90+
; GFX9-LABEL: v_underflow_compare_fold_i64:
91+
; GFX9: ; %bb.0:
92+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93+
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
94+
; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
95+
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
96+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
97+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
98+
; GFX9-NEXT: s_setpc_b64 s[30:31]
99+
;
100+
; GFX11-LABEL: v_underflow_compare_fold_i64:
101+
; GFX11: ; %bb.0:
102+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103+
; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
104+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
105+
; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
106+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
107+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
108+
; GFX11-NEXT: s_setpc_b64 s[30:31]
109+
%sub = sub i64 %a, %b
110+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
111+
ret i64 %cond
112+
}
113+
114+
define i64 @v_underflow_compare_fold_i64_commute(i64 %a, i64 %b) #0 {
115+
; GFX9-LABEL: v_underflow_compare_fold_i64_commute:
116+
; GFX9: ; %bb.0:
117+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118+
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
119+
; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
120+
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
121+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
122+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
123+
; GFX9-NEXT: s_setpc_b64 s[30:31]
124+
;
125+
; GFX11-LABEL: v_underflow_compare_fold_i64_commute:
126+
; GFX11: ; %bb.0:
127+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128+
; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
129+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
130+
; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
131+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
132+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1
133+
; GFX11-NEXT: s_setpc_b64 s[30:31]
134+
%sub = sub i64 %a, %b
135+
%cond = call i64 @llvm.umin.i64(i64 %a, i64 %sub)
136+
ret i64 %cond
137+
}
138+
139+
define i64 @v_underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) #0 {
140+
; GFX9-LABEL: v_underflow_compare_fold_i64_multi_use:
141+
; GFX9: ; %bb.0:
142+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143+
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
144+
; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
145+
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
146+
; GFX9-NEXT: global_store_dwordx2 v[4:5], v[2:3], off
147+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
148+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
149+
; GFX9-NEXT: s_waitcnt vmcnt(0)
150+
; GFX9-NEXT: s_setpc_b64 s[30:31]
151+
;
152+
; GFX11-LABEL: v_underflow_compare_fold_i64_multi_use:
153+
; GFX11: ; %bb.0:
154+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155+
; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
156+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
157+
; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
158+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
159+
; GFX11-NEXT: global_store_b64 v[4:5], v[2:3], off
160+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
161+
; GFX11-NEXT: s_setpc_b64 s[30:31]
162+
%sub = sub i64 %a, %b
163+
store i64 %sub, ptr addrspace(1) %ptr
164+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
165+
ret i64 %cond
166+
}
167+
168+
define amdgpu_ps i16 @s_underflow_compare_fold_i16(i16 inreg %a, i16 inreg %b) #0 {
169+
; GFX9-LABEL: s_underflow_compare_fold_i16:
170+
; GFX9: ; %bb.0:
171+
; GFX9-NEXT: s_sub_i32 s1, s0, s1
172+
; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
173+
; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
174+
; GFX9-NEXT: s_min_u32 s0, s1, s0
175+
; GFX9-NEXT: ; return to shader part epilog
176+
;
177+
; GFX11-LABEL: s_underflow_compare_fold_i16:
178+
; GFX11: ; %bb.0:
179+
; GFX11-NEXT: s_sub_i32 s1, s0, s1
180+
; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
181+
; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
182+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
183+
; GFX11-NEXT: s_min_u32 s0, s1, s0
184+
; GFX11-NEXT: ; return to shader part epilog
185+
%sub = sub i16 %a, %b
186+
%cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
187+
ret i16 %cond
188+
}
189+
190+
define amdgpu_ps i32 @s_underflow_compare_fold_i32(i32 inreg %a, i32 inreg %b) #0 {
191+
; GFX9-LABEL: s_underflow_compare_fold_i32:
192+
; GFX9: ; %bb.0:
193+
; GFX9-NEXT: s_sub_i32 s1, s0, s1
194+
; GFX9-NEXT: s_min_u32 s0, s1, s0
195+
; GFX9-NEXT: ; return to shader part epilog
196+
;
197+
; GFX11-LABEL: s_underflow_compare_fold_i32:
198+
; GFX11: ; %bb.0:
199+
; GFX11-NEXT: s_sub_i32 s1, s0, s1
200+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
201+
; GFX11-NEXT: s_min_u32 s0, s1, s0
202+
; GFX11-NEXT: ; return to shader part epilog
203+
%sub = sub i32 %a, %b
204+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
205+
ret i32 %cond
206+
}
207+
208+
define amdgpu_ps i64 @s_underflow_compare_fold_i64(i64 inreg %a, i64 inreg %b) #0 {
209+
; GFX9-LABEL: s_underflow_compare_fold_i64:
210+
; GFX9: ; %bb.0:
211+
; GFX9-NEXT: s_sub_u32 s2, s0, s2
212+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
213+
; GFX9-NEXT: s_subb_u32 s3, s1, s3
214+
; GFX9-NEXT: v_mov_b32_e32 v1, s1
215+
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
216+
; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec
217+
; GFX9-NEXT: s_cselect_b32 s1, s3, s1
218+
; GFX9-NEXT: s_cselect_b32 s0, s2, s0
219+
; GFX9-NEXT: ; return to shader part epilog
220+
;
221+
; GFX11-LABEL: s_underflow_compare_fold_i64:
222+
; GFX11: ; %bb.0:
223+
; GFX11-NEXT: s_sub_u32 s2, s0, s2
224+
; GFX11-NEXT: s_subb_u32 s3, s1, s3
225+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
226+
; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[2:3], s[0:1]
227+
; GFX11-NEXT: s_and_b32 s4, s4, exec_lo
228+
; GFX11-NEXT: s_cselect_b32 s0, s2, s0
229+
; GFX11-NEXT: s_cselect_b32 s1, s3, s1
230+
; GFX11-NEXT: ; return to shader part epilog
231+
%sub = sub i64 %a, %b
232+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
233+
ret i64 %cond
234+
}
235+
236+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)