Skip to content

Commit fff0273

Browse files
committed
update testcase (test with and without legal 16-bit operations)
1 parent b127081 commit fff0273

File tree

1 file changed

+223
-1
lines changed

1 file changed

+223
-1
lines changed

llvm/test/CodeGen/AMDGPU/fptoui_uitofp.ll

Lines changed: 223 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc < %s -mtriple=amdgcn | FileCheck %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
34

45
define amdgpu_kernel void @fptoui_f32_to_i16_to_f32(ptr addrspace(1) %out, float %x) {
56
; CHECK-LABEL: fptoui_f32_to_i16_to_f32:
@@ -12,6 +13,26 @@ define amdgpu_kernel void @fptoui_f32_to_i16_to_f32(ptr addrspace(1) %out, float
1213
; CHECK-NEXT: v_trunc_f32_e64 v0, |s6|
1314
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
1415
; CHECK-NEXT: s_endpgm
16+
; GFX6-LABEL: fptoui_f32_to_i16_to_f32:
17+
; GFX6: ; %bb.0: ; %entry
18+
; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb
19+
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
20+
; GFX6-NEXT: s_mov_b32 s3, 0xf000
21+
; GFX6-NEXT: s_mov_b32 s2, -1
22+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
23+
; GFX6-NEXT: v_trunc_f32_e64 v0, |s6|
24+
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
25+
; GFX6-NEXT: s_endpgm
26+
;
27+
; GFX9-LABEL: fptoui_f32_to_i16_to_f32:
28+
; GFX9: ; %bb.0: ; %entry
29+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
30+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
31+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
32+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
33+
; GFX9-NEXT: v_trunc_f32_e64 v1, |s2|
34+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
35+
; GFX9-NEXT: s_endpgm
1536
entry:
1637
%ui = fptoui float %x to i16
1738
%fp = uitofp i16 %ui to float
@@ -30,6 +51,26 @@ define amdgpu_kernel void @fptoui_f32_to_i32_to_f32(ptr addrspace(1) %out, float
3051
; CHECK-NEXT: v_trunc_f32_e64 v0, |s6|
3152
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
3253
; CHECK-NEXT: s_endpgm
54+
; GFX6-LABEL: fptoui_f32_to_i32_to_f32:
55+
; GFX6: ; %bb.0: ; %entry
56+
; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb
57+
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
58+
; GFX6-NEXT: s_mov_b32 s3, 0xf000
59+
; GFX6-NEXT: s_mov_b32 s2, -1
60+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
61+
; GFX6-NEXT: v_trunc_f32_e64 v0, |s6|
62+
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
63+
; GFX6-NEXT: s_endpgm
64+
;
65+
; GFX9-LABEL: fptoui_f32_to_i32_to_f32:
66+
; GFX9: ; %bb.0: ; %entry
67+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
68+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
69+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
70+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
71+
; GFX9-NEXT: v_trunc_f32_e64 v1, |s2|
72+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
73+
; GFX9-NEXT: s_endpgm
3374
entry:
3475
%ui = fptoui float %x to i32
3576
%fp = uitofp i32 %ui to float
@@ -48,6 +89,26 @@ define amdgpu_kernel void @fptoui_f32_to_i64_to_f32(ptr addrspace(1) %out, float
4889
; CHECK-NEXT: v_trunc_f32_e64 v0, |s6|
4990
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
5091
; CHECK-NEXT: s_endpgm
92+
; GFX6-LABEL: fptoui_f32_to_i64_to_f32:
93+
; GFX6: ; %bb.0: ; %entry
94+
; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb
95+
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
96+
; GFX6-NEXT: s_mov_b32 s3, 0xf000
97+
; GFX6-NEXT: s_mov_b32 s2, -1
98+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
99+
; GFX6-NEXT: v_trunc_f32_e64 v0, |s6|
100+
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
101+
; GFX6-NEXT: s_endpgm
102+
;
103+
; GFX9-LABEL: fptoui_f32_to_i64_to_f32:
104+
; GFX9: ; %bb.0: ; %entry
105+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
106+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
107+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
108+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
109+
; GFX9-NEXT: v_trunc_f32_e64 v1, |s2|
110+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
111+
; GFX9-NEXT: s_endpgm
51112
entry:
52113
%ui = fptoui float %x to i64
53114
%fp = uitofp i64 %ui to float
@@ -69,6 +130,30 @@ define amdgpu_kernel void @fptoui_f16_to_i16_to_f16(ptr addrspace(1) %out, half
69130
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
70131
; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0
71132
; CHECK-NEXT: s_endpgm
133+
; GFX6-LABEL: fptoui_f16_to_i16_to_f16:
134+
; GFX6: ; %bb.0: ; %entry
135+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb
136+
; GFX6-NEXT: s_mov_b32 s3, 0xf000
137+
; GFX6-NEXT: s_mov_b32 s2, -1
138+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
139+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, s0
140+
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
141+
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
142+
; GFX6-NEXT: v_cvt_f32_u32_e32 v0, v0
143+
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
144+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
145+
; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
146+
; GFX6-NEXT: s_endpgm
147+
;
148+
; GFX9-LABEL: fptoui_f16_to_i16_to_f16:
149+
; GFX9: ; %bb.0: ; %entry
150+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
151+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
152+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
153+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
154+
; GFX9-NEXT: v_trunc_f16_e64 v1, |s2|
155+
; GFX9-NEXT: global_store_short v0, v1, s[0:1]
156+
; GFX9-NEXT: s_endpgm
72157
entry:
73158
%ui = fptoui half %x to i16
74159
%fp = uitofp i16 %ui to half
@@ -89,6 +174,29 @@ define amdgpu_kernel void @fptoui_f16_to_i32_to_f16(ptr addrspace(1) %out, half
89174
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
90175
; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0
91176
; CHECK-NEXT: s_endpgm
177+
; GFX6-LABEL: fptoui_f16_to_i32_to_f16:
178+
; GFX6: ; %bb.0: ; %entry
179+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb
180+
; GFX6-NEXT: s_mov_b32 s3, 0xf000
181+
; GFX6-NEXT: s_mov_b32 s2, -1
182+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
183+
; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |s0|
184+
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
185+
; GFX6-NEXT: v_trunc_f32_e32 v0, v0
186+
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
187+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
188+
; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
189+
; GFX6-NEXT: s_endpgm
190+
;
191+
; GFX9-LABEL: fptoui_f16_to_i32_to_f16:
192+
; GFX9: ; %bb.0: ; %entry
193+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
194+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
195+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
196+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
197+
; GFX9-NEXT: v_trunc_f16_e64 v1, |s2|
198+
; GFX9-NEXT: global_store_short v0, v1, s[0:1]
199+
; GFX9-NEXT: s_endpgm
92200
entry:
93201
%ui = fptoui half %x to i32
94202
%fp = uitofp i32 %ui to half
@@ -109,6 +217,29 @@ define amdgpu_kernel void @fptoui_f16_to_i64_to_f16(ptr addrspace(1) %out, half
109217
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
110218
; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0
111219
; CHECK-NEXT: s_endpgm
220+
; GFX6-LABEL: fptoui_f16_to_i64_to_f16:
221+
; GFX6: ; %bb.0: ; %entry
222+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb
223+
; GFX6-NEXT: s_mov_b32 s3, 0xf000
224+
; GFX6-NEXT: s_mov_b32 s2, -1
225+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
226+
; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |s0|
227+
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
228+
; GFX6-NEXT: v_trunc_f32_e32 v0, v0
229+
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
230+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
231+
; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
232+
; GFX6-NEXT: s_endpgm
233+
;
234+
; GFX9-LABEL: fptoui_f16_to_i64_to_f16:
235+
; GFX9: ; %bb.0: ; %entry
236+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
237+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
238+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
239+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
240+
; GFX9-NEXT: v_trunc_f16_e64 v1, |s2|
241+
; GFX9-NEXT: global_store_short v0, v1, s[0:1]
242+
; GFX9-NEXT: s_endpgm
112243
entry:
113244
%ui = fptoui half %x to i64
114245
%fp = uitofp i64 %ui to half
@@ -129,6 +260,25 @@ define amdgpu_kernel void @fptoui_f64_to_i16_to_f64(ptr addrspace(1) %out, doubl
129260
; CHECK-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
130261
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
131262
; CHECK-NEXT: s_endpgm
263+
; GFX6-LABEL: fptoui_f64_to_i16_to_f64:
264+
; GFX6: ; %bb.0: ; %entry
265+
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
266+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
267+
; GFX6-NEXT: v_cvt_u32_f64_e32 v0, s[2:3]
268+
; GFX6-NEXT: s_mov_b32 s3, 0xf000
269+
; GFX6-NEXT: s_mov_b32 s2, -1
270+
; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
271+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
272+
; GFX6-NEXT: s_endpgm
273+
;
274+
; GFX9-LABEL: fptoui_f64_to_i16_to_f64:
275+
; GFX9: ; %bb.0: ; %entry
276+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
277+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
278+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
279+
; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]|
280+
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
281+
; GFX9-NEXT: s_endpgm
132282
entry:
133283
%ui = fptoui double %x to i16
134284
%fp = uitofp i16 %ui to double
@@ -149,6 +299,25 @@ define amdgpu_kernel void @fptoui_f64_to_i32_to_f64(ptr addrspace(1) %out, doubl
149299
; CHECK-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
150300
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
151301
; CHECK-NEXT: s_endpgm
302+
; GFX6-LABEL: fptoui_f64_to_i32_to_f64:
303+
; GFX6: ; %bb.0: ; %entry
304+
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
305+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
306+
; GFX6-NEXT: v_cvt_u32_f64_e32 v0, s[2:3]
307+
; GFX6-NEXT: s_mov_b32 s3, 0xf000
308+
; GFX6-NEXT: s_mov_b32 s2, -1
309+
; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
310+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
311+
; GFX6-NEXT: s_endpgm
312+
;
313+
; GFX9-LABEL: fptoui_f64_to_i32_to_f64:
314+
; GFX9: ; %bb.0: ; %entry
315+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
316+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
317+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
318+
; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]|
319+
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
320+
; GFX9-NEXT: s_endpgm
152321
entry:
153322
%ui = fptoui double %x to i32
154323
%fp = uitofp i32 %ui to double
@@ -201,6 +370,59 @@ define amdgpu_kernel void @fptoui_f64_to_i64_to_f64(ptr addrspace(1) %out, doubl
201370
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
202371
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
203372
; CHECK-NEXT: s_endpgm
373+
; GFX6-LABEL: fptoui_f64_to_i64_to_f64:
374+
; GFX6: ; %bb.0: ; %entry
375+
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
376+
; GFX6-NEXT: s_mov_b32 s6, -1
377+
; GFX6-NEXT: s_mov_b32 s5, 0xfffff
378+
; GFX6-NEXT: s_mov_b32 s4, s6
379+
; GFX6-NEXT: v_not_b32_e32 v0, 31
380+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
381+
; GFX6-NEXT: s_bfe_u32 s7, s3, 0xb0014
382+
; GFX6-NEXT: s_addk_i32 s7, 0xfc01
383+
; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s7
384+
; GFX6-NEXT: s_and_b32 s8, s3, 0x80000000
385+
; GFX6-NEXT: s_andn2_b64 s[4:5], s[2:3], s[4:5]
386+
; GFX6-NEXT: s_cmp_lt_i32 s7, 0
387+
; GFX6-NEXT: s_cselect_b32 s4, 0, s4
388+
; GFX6-NEXT: s_cselect_b32 s5, s8, s5
389+
; GFX6-NEXT: s_cmp_gt_i32 s7, 51
390+
; GFX6-NEXT: s_cselect_b32 s3, s3, s5
391+
; GFX6-NEXT: s_cselect_b32 s2, s2, s4
392+
; GFX6-NEXT: v_ldexp_f64 v[0:1], s[2:3], v0
393+
; GFX6-NEXT: v_mov_b32_e32 v4, -1
394+
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
395+
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
396+
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
397+
; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3
398+
; GFX6-NEXT: s_mov_b32 s4, 0
399+
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
400+
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
401+
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
402+
; GFX6-NEXT: v_mov_b32_e32 v2, s2
403+
; GFX6-NEXT: s_mov_b32 s5, 0xc1f00000
404+
; GFX6-NEXT: v_mov_b32_e32 v3, s3
405+
; GFX6-NEXT: v_fma_f64 v[2:3], v[0:1], s[4:5], v[2:3]
406+
; GFX6-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
407+
; GFX6-NEXT: v_cvt_u32_f64_e32 v2, v[2:3]
408+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
409+
; GFX6-NEXT: s_mov_b32 s4, s0
410+
; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
411+
; GFX6-NEXT: v_cvt_f64_u32_e32 v[2:3], v2
412+
; GFX6-NEXT: s_mov_b32 s5, s1
413+
; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
414+
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
415+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
416+
; GFX6-NEXT: s_endpgm
417+
;
418+
; GFX9-LABEL: fptoui_f64_to_i64_to_f64:
419+
; GFX9: ; %bb.0: ; %entry
420+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
421+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
422+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
423+
; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]|
424+
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
425+
; GFX9-NEXT: s_endpgm
204426
entry:
205427
%ui = fptoui double %x to i64
206428
%fp = uitofp i64 %ui to double

0 commit comments

Comments
 (0)