Skip to content

Commit 726c049

Browse files
authored
AMDGPU: Add baseline test for nofpclass on call results (llvm#167263)
1 parent 741ba82 commit 726c049

File tree

1 file changed

+199
-0
lines changed

1 file changed

+199
-0
lines changed
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3+
4+
; Check that nofpclass attributes on call returns are used in
5+
; selectiondag.
6+
7+
define internal float @func_f32(ptr addrspace(1) %ptr) {
8+
; CHECK-LABEL: func_f32:
9+
; CHECK: ; %bb.0:
10+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
12+
; CHECK-NEXT: s_waitcnt vmcnt(0)
13+
; CHECK-NEXT: s_setpc_b64 s[30:31]
14+
%ld = load volatile float, ptr addrspace(1) %ptr
15+
ret float %ld
16+
}
17+
18+
define float @call_nofpclass_funcs_f32(ptr addrspace(1) %ptr) {
19+
; CHECK-LABEL: call_nofpclass_funcs_f32:
20+
; CHECK: ; %bb.0:
21+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22+
; CHECK-NEXT: s_mov_b32 s18, s33
23+
; CHECK-NEXT: s_mov_b32 s33, s32
24+
; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
25+
; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill
26+
; CHECK-NEXT: s_mov_b64 exec, s[16:17]
27+
; CHECK-NEXT: s_addk_i32 s32, 0x400
28+
; CHECK-NEXT: v_writelane_b32 v4, s30, 0
29+
; CHECK-NEXT: s_getpc_b64 s[16:17]
30+
; CHECK-NEXT: s_add_u32 s16, s16, func_f32@rel32@lo+4
31+
; CHECK-NEXT: s_addc_u32 s17, s17, func_f32@rel32@hi+12
32+
; CHECK-NEXT: v_writelane_b32 v4, s31, 1
33+
; CHECK-NEXT: v_mov_b32_e32 v2, v0
34+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
35+
; CHECK-NEXT: v_mov_b32_e32 v3, v0
36+
; CHECK-NEXT: v_mov_b32_e32 v0, v2
37+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
38+
; CHECK-NEXT: v_max_f32_e32 v1, v3, v3
39+
; CHECK-NEXT: v_max_f32_e32 v0, v0, v0
40+
; CHECK-NEXT: v_min_f32_e32 v0, v1, v0
41+
; CHECK-NEXT: v_readlane_b32 s31, v4, 1
42+
; CHECK-NEXT: v_readlane_b32 s30, v4, 0
43+
; CHECK-NEXT: s_mov_b32 s32, s33
44+
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
45+
; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload
46+
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
47+
; CHECK-NEXT: s_mov_b32 s33, s18
48+
; CHECK-NEXT: s_waitcnt vmcnt(0)
49+
; CHECK-NEXT: s_setpc_b64 s[30:31]
50+
%call0 = call nofpclass(nan) float @func_f32(ptr addrspace(1) %ptr)
51+
%call1 = call nofpclass(nan) float @func_f32(ptr addrspace(1) %ptr)
52+
%min = call float @llvm.minnum.f32(float %call0, float %call1)
53+
ret float %min
54+
}
55+
56+
define internal <2 x float> @func_v2f32(ptr addrspace(1) %ptr) {
57+
; CHECK-LABEL: func_v2f32:
58+
; CHECK: ; %bb.0:
59+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
61+
; CHECK-NEXT: s_waitcnt vmcnt(0)
62+
; CHECK-NEXT: s_setpc_b64 s[30:31]
63+
%ld = load volatile <2 x float>, ptr addrspace(1) %ptr
64+
ret <2 x float> %ld
65+
}
66+
67+
define <2 x float> @call_nofpclass_funcs_v2f32(ptr addrspace(1) %ptr) {
68+
; CHECK-LABEL: call_nofpclass_funcs_v2f32:
69+
; CHECK: ; %bb.0:
70+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71+
; CHECK-NEXT: s_mov_b32 s18, s33
72+
; CHECK-NEXT: s_mov_b32 s33, s32
73+
; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
74+
; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill
75+
; CHECK-NEXT: s_mov_b64 exec, s[16:17]
76+
; CHECK-NEXT: s_addk_i32 s32, 0x400
77+
; CHECK-NEXT: v_writelane_b32 v6, s30, 0
78+
; CHECK-NEXT: s_getpc_b64 s[16:17]
79+
; CHECK-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4
80+
; CHECK-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12
81+
; CHECK-NEXT: v_writelane_b32 v6, s31, 1
82+
; CHECK-NEXT: v_mov_b32_e32 v2, v1
83+
; CHECK-NEXT: v_mov_b32_e32 v3, v0
84+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
85+
; CHECK-NEXT: v_mov_b32_e32 v4, v0
86+
; CHECK-NEXT: v_mov_b32_e32 v5, v1
87+
; CHECK-NEXT: v_mov_b32_e32 v0, v3
88+
; CHECK-NEXT: v_mov_b32_e32 v1, v2
89+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
90+
; CHECK-NEXT: v_max_f32_e32 v2, v4, v4
91+
; CHECK-NEXT: v_max_f32_e32 v0, v0, v0
92+
; CHECK-NEXT: v_min_f32_e32 v0, v2, v0
93+
; CHECK-NEXT: v_max_f32_e32 v2, v5, v5
94+
; CHECK-NEXT: v_max_f32_e32 v1, v1, v1
95+
; CHECK-NEXT: v_min_f32_e32 v1, v2, v1
96+
; CHECK-NEXT: v_readlane_b32 s31, v6, 1
97+
; CHECK-NEXT: v_readlane_b32 s30, v6, 0
98+
; CHECK-NEXT: s_mov_b32 s32, s33
99+
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
100+
; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
101+
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
102+
; CHECK-NEXT: s_mov_b32 s33, s18
103+
; CHECK-NEXT: s_waitcnt vmcnt(0)
104+
; CHECK-NEXT: s_setpc_b64 s[30:31]
105+
%call0 = call nofpclass(nan) <2 x float> @func_v2f32(ptr addrspace(1) %ptr)
106+
%call1 = call nofpclass(nan) <2 x float> @func_v2f32(ptr addrspace(1) %ptr)
107+
%min = call <2 x float> @llvm.minnum.v2f32(<2 x float> %call0, <2 x float> %call1)
108+
ret <2 x float> %min
109+
}
110+
111+
define internal double @func_f64(ptr addrspace(1) %ptr) {
112+
; CHECK-LABEL: func_f64:
113+
; CHECK: ; %bb.0:
114+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
116+
; CHECK-NEXT: s_waitcnt vmcnt(0)
117+
; CHECK-NEXT: s_setpc_b64 s[30:31]
118+
%ld = load volatile double, ptr addrspace(1) %ptr
119+
ret double %ld
120+
}
121+
122+
define double @call_nofpclass_funcs_f64(ptr addrspace(1) %ptr) {
123+
; CHECK-LABEL: call_nofpclass_funcs_f64:
124+
; CHECK: ; %bb.0:
125+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126+
; CHECK-NEXT: s_mov_b32 s18, s33
127+
; CHECK-NEXT: s_mov_b32 s33, s32
128+
; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
129+
; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill
130+
; CHECK-NEXT: s_mov_b64 exec, s[16:17]
131+
; CHECK-NEXT: s_addk_i32 s32, 0x400
132+
; CHECK-NEXT: v_writelane_b32 v6, s30, 0
133+
; CHECK-NEXT: s_getpc_b64 s[16:17]
134+
; CHECK-NEXT: s_add_u32 s16, s16, func_f64@rel32@lo+4
135+
; CHECK-NEXT: s_addc_u32 s17, s17, func_f64@rel32@hi+12
136+
; CHECK-NEXT: v_writelane_b32 v6, s31, 1
137+
; CHECK-NEXT: v_mov_b32_e32 v4, v1
138+
; CHECK-NEXT: v_mov_b32_e32 v5, v0
139+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
140+
; CHECK-NEXT: v_mov_b32_e32 v2, v0
141+
; CHECK-NEXT: v_mov_b32_e32 v3, v1
142+
; CHECK-NEXT: v_mov_b32_e32 v0, v5
143+
; CHECK-NEXT: v_mov_b32_e32 v1, v4
144+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
145+
; CHECK-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
146+
; CHECK-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
147+
; CHECK-NEXT: v_readlane_b32 s31, v6, 1
148+
; CHECK-NEXT: v_readlane_b32 s30, v6, 0
149+
; CHECK-NEXT: s_mov_b32 s32, s33
150+
; CHECK-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
151+
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
152+
; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
153+
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
154+
; CHECK-NEXT: s_mov_b32 s33, s18
155+
; CHECK-NEXT: s_waitcnt vmcnt(0)
156+
; CHECK-NEXT: s_setpc_b64 s[30:31]
157+
%call0 = call nofpclass(nan) double @func_f64(ptr addrspace(1) %ptr)
158+
%call1 = call nofpclass(nan) double @func_f64(ptr addrspace(1) %ptr)
159+
%min = call double @llvm.minnum.f64(double %call0, double %call1)
160+
ret double %min
161+
}
162+
163+
define float @call_nofpclass_intrinsic_f32(float %x, float %y, float %z) {
164+
; CHECK-LABEL: call_nofpclass_intrinsic_f32:
165+
; CHECK: ; %bb.0:
166+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167+
; CHECK-NEXT: v_sqrt_f32_e32 v0, v0
168+
; CHECK-NEXT: v_sqrt_f32_e32 v1, v1
169+
; CHECK-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
170+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
171+
; CHECK-NEXT: s_setpc_b64 s[30:31]
172+
%call0 = call nofpclass(nan) float @llvm.amdgcn.sqrt.f32(float %x)
173+
%call1 = call nofpclass(nan) float @llvm.amdgcn.sqrt.f32(float %y)
174+
%lt = fcmp olt float %call0, %call1
175+
%min = select nsz i1 %lt, float %call0, float %call1
176+
ret float %min
177+
}
178+
179+
define <2 x half> @call_nofpclass_intrinsic_v2f16(float %x, float %y, float %z, float %w) {
180+
; CHECK-LABEL: call_nofpclass_intrinsic_v2f16:
181+
; CHECK: ; %bb.0:
182+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183+
; CHECK-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v1
184+
; CHECK-NEXT: v_cvt_pkrtz_f16_f32 v1, v2, v3
185+
; CHECK-NEXT: v_lshrrev_b32_e32 v2, 16, v1
186+
; CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0
187+
; CHECK-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
188+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
189+
; CHECK-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
190+
; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
191+
; CHECK-NEXT: s_mov_b32 s4, 0x5040100
192+
; CHECK-NEXT: v_perm_b32 v0, v1, v0, s4
193+
; CHECK-NEXT: s_setpc_b64 s[30:31]
194+
%call0 = call nofpclass(nan) <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
195+
%call1 = call nofpclass(nan) <2 x half> @llvm.amdgcn.cvt.pkrtz(float %z, float %w)
196+
%lt = fcmp olt <2 x half> %call0, %call1
197+
%min = select nsz <2 x i1> %lt, <2 x half> %call0, <2 x half> %call1
198+
ret <2 x half> %min
199+
}

0 commit comments

Comments
 (0)