11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2- ; RUN: llc < %s -mtriple=amdgcn | FileCheck %s
2+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s
3+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
34
45define amdgpu_kernel void @fptoui_f32_to_i16_to_f32 (ptr addrspace (1 ) %out , float %x ) {
56; CHECK-LABEL: fptoui_f32_to_i16_to_f32:
@@ -12,6 +13,26 @@ define amdgpu_kernel void @fptoui_f32_to_i16_to_f32(ptr addrspace(1) %out, float
1213; CHECK-NEXT: v_trunc_f32_e64 v0, |s6|
1314; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
1415; CHECK-NEXT: s_endpgm
16+ ; GFX6-LABEL: fptoui_f32_to_i16_to_f32:
17+ ; GFX6: ; %bb.0: ; %entry
18+ ; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb
19+ ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
20+ ; GFX6-NEXT: s_mov_b32 s3, 0xf000
21+ ; GFX6-NEXT: s_mov_b32 s2, -1
22+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
23+ ; GFX6-NEXT: v_trunc_f32_e64 v0, |s6|
24+ ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
25+ ; GFX6-NEXT: s_endpgm
26+ ;
27+ ; GFX9-LABEL: fptoui_f32_to_i16_to_f32:
28+ ; GFX9: ; %bb.0: ; %entry
29+ ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
30+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
31+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
32+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
33+ ; GFX9-NEXT: v_trunc_f32_e64 v1, |s2|
34+ ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
35+ ; GFX9-NEXT: s_endpgm
1536entry:
1637 %ui = fptoui float %x to i16
1738 %fp = uitofp i16 %ui to float
@@ -30,6 +51,26 @@ define amdgpu_kernel void @fptoui_f32_to_i32_to_f32(ptr addrspace(1) %out, float
3051; CHECK-NEXT: v_trunc_f32_e64 v0, |s6|
3152; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
3253; CHECK-NEXT: s_endpgm
54+ ; GFX6-LABEL: fptoui_f32_to_i32_to_f32:
55+ ; GFX6: ; %bb.0: ; %entry
56+ ; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb
57+ ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
58+ ; GFX6-NEXT: s_mov_b32 s3, 0xf000
59+ ; GFX6-NEXT: s_mov_b32 s2, -1
60+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
61+ ; GFX6-NEXT: v_trunc_f32_e64 v0, |s6|
62+ ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
63+ ; GFX6-NEXT: s_endpgm
64+ ;
65+ ; GFX9-LABEL: fptoui_f32_to_i32_to_f32:
66+ ; GFX9: ; %bb.0: ; %entry
67+ ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
68+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
69+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
70+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
71+ ; GFX9-NEXT: v_trunc_f32_e64 v1, |s2|
72+ ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
73+ ; GFX9-NEXT: s_endpgm
3374entry:
3475 %ui = fptoui float %x to i32
3576 %fp = uitofp i32 %ui to float
@@ -48,6 +89,26 @@ define amdgpu_kernel void @fptoui_f32_to_i64_to_f32(ptr addrspace(1) %out, float
4889; CHECK-NEXT: v_trunc_f32_e64 v0, |s6|
4990; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
5091; CHECK-NEXT: s_endpgm
92+ ; GFX6-LABEL: fptoui_f32_to_i64_to_f32:
93+ ; GFX6: ; %bb.0: ; %entry
94+ ; GFX6-NEXT: s_load_dword s6, s[4:5], 0xb
95+ ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
96+ ; GFX6-NEXT: s_mov_b32 s3, 0xf000
97+ ; GFX6-NEXT: s_mov_b32 s2, -1
98+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
99+ ; GFX6-NEXT: v_trunc_f32_e64 v0, |s6|
100+ ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
101+ ; GFX6-NEXT: s_endpgm
102+ ;
103+ ; GFX9-LABEL: fptoui_f32_to_i64_to_f32:
104+ ; GFX9: ; %bb.0: ; %entry
105+ ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
106+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
107+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
108+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
109+ ; GFX9-NEXT: v_trunc_f32_e64 v1, |s2|
110+ ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
111+ ; GFX9-NEXT: s_endpgm
51112entry:
52113 %ui = fptoui float %x to i64
53114 %fp = uitofp i64 %ui to float
@@ -69,6 +130,30 @@ define amdgpu_kernel void @fptoui_f16_to_i16_to_f16(ptr addrspace(1) %out, half
69130; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
70131; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0
71132; CHECK-NEXT: s_endpgm
133+ ; GFX6-LABEL: fptoui_f16_to_i16_to_f16:
134+ ; GFX6: ; %bb.0: ; %entry
135+ ; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb
136+ ; GFX6-NEXT: s_mov_b32 s3, 0xf000
137+ ; GFX6-NEXT: s_mov_b32 s2, -1
138+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
139+ ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, s0
140+ ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
141+ ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
142+ ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, v0
143+ ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
144+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
145+ ; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
146+ ; GFX6-NEXT: s_endpgm
147+ ;
148+ ; GFX9-LABEL: fptoui_f16_to_i16_to_f16:
149+ ; GFX9: ; %bb.0: ; %entry
150+ ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
151+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
152+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
153+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
154+ ; GFX9-NEXT: v_trunc_f16_e64 v1, |s2|
155+ ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
156+ ; GFX9-NEXT: s_endpgm
72157entry:
73158 %ui = fptoui half %x to i16
74159 %fp = uitofp i16 %ui to half
@@ -89,6 +174,29 @@ define amdgpu_kernel void @fptoui_f16_to_i32_to_f16(ptr addrspace(1) %out, half
89174; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
90175; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0
91176; CHECK-NEXT: s_endpgm
177+ ; GFX6-LABEL: fptoui_f16_to_i32_to_f16:
178+ ; GFX6: ; %bb.0: ; %entry
179+ ; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb
180+ ; GFX6-NEXT: s_mov_b32 s3, 0xf000
181+ ; GFX6-NEXT: s_mov_b32 s2, -1
182+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
183+ ; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |s0|
184+ ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
185+ ; GFX6-NEXT: v_trunc_f32_e32 v0, v0
186+ ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
187+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
188+ ; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
189+ ; GFX6-NEXT: s_endpgm
190+ ;
191+ ; GFX9-LABEL: fptoui_f16_to_i32_to_f16:
192+ ; GFX9: ; %bb.0: ; %entry
193+ ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
194+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
195+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
196+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
197+ ; GFX9-NEXT: v_trunc_f16_e64 v1, |s2|
198+ ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
199+ ; GFX9-NEXT: s_endpgm
92200entry:
93201 %ui = fptoui half %x to i32
94202 %fp = uitofp i32 %ui to half
@@ -109,6 +217,29 @@ define amdgpu_kernel void @fptoui_f16_to_i64_to_f16(ptr addrspace(1) %out, half
109217; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
110218; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0
111219; CHECK-NEXT: s_endpgm
220+ ; GFX6-LABEL: fptoui_f16_to_i64_to_f16:
221+ ; GFX6: ; %bb.0: ; %entry
222+ ; GFX6-NEXT: s_load_dword s0, s[4:5], 0xb
223+ ; GFX6-NEXT: s_mov_b32 s3, 0xf000
224+ ; GFX6-NEXT: s_mov_b32 s2, -1
225+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
226+ ; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |s0|
227+ ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
228+ ; GFX6-NEXT: v_trunc_f32_e32 v0, v0
229+ ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
230+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
231+ ; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
232+ ; GFX6-NEXT: s_endpgm
233+ ;
234+ ; GFX9-LABEL: fptoui_f16_to_i64_to_f16:
235+ ; GFX9: ; %bb.0: ; %entry
236+ ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
237+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
238+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
239+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
240+ ; GFX9-NEXT: v_trunc_f16_e64 v1, |s2|
241+ ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
242+ ; GFX9-NEXT: s_endpgm
112243entry:
113244 %ui = fptoui half %x to i64
114245 %fp = uitofp i64 %ui to half
@@ -129,6 +260,25 @@ define amdgpu_kernel void @fptoui_f64_to_i16_to_f64(ptr addrspace(1) %out, doubl
129260; CHECK-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
130261; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
131262; CHECK-NEXT: s_endpgm
263+ ; GFX6-LABEL: fptoui_f64_to_i16_to_f64:
264+ ; GFX6: ; %bb.0: ; %entry
265+ ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
266+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
267+ ; GFX6-NEXT: v_cvt_u32_f64_e32 v0, s[2:3]
268+ ; GFX6-NEXT: s_mov_b32 s3, 0xf000
269+ ; GFX6-NEXT: s_mov_b32 s2, -1
270+ ; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
271+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
272+ ; GFX6-NEXT: s_endpgm
273+ ;
274+ ; GFX9-LABEL: fptoui_f64_to_i16_to_f64:
275+ ; GFX9: ; %bb.0: ; %entry
276+ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
277+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0
278+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
279+ ; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]|
280+ ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
281+ ; GFX9-NEXT: s_endpgm
132282entry:
133283 %ui = fptoui double %x to i16
134284 %fp = uitofp i16 %ui to double
@@ -149,6 +299,25 @@ define amdgpu_kernel void @fptoui_f64_to_i32_to_f64(ptr addrspace(1) %out, doubl
149299; CHECK-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
150300; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
151301; CHECK-NEXT: s_endpgm
302+ ; GFX6-LABEL: fptoui_f64_to_i32_to_f64:
303+ ; GFX6: ; %bb.0: ; %entry
304+ ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
305+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
306+ ; GFX6-NEXT: v_cvt_u32_f64_e32 v0, s[2:3]
307+ ; GFX6-NEXT: s_mov_b32 s3, 0xf000
308+ ; GFX6-NEXT: s_mov_b32 s2, -1
309+ ; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
310+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
311+ ; GFX6-NEXT: s_endpgm
312+ ;
313+ ; GFX9-LABEL: fptoui_f64_to_i32_to_f64:
314+ ; GFX9: ; %bb.0: ; %entry
315+ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
316+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0
317+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
318+ ; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]|
319+ ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
320+ ; GFX9-NEXT: s_endpgm
152321entry:
153322 %ui = fptoui double %x to i32
154323 %fp = uitofp i32 %ui to double
@@ -201,6 +370,59 @@ define amdgpu_kernel void @fptoui_f64_to_i64_to_f64(ptr addrspace(1) %out, doubl
201370; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
202371; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
203372; CHECK-NEXT: s_endpgm
373+ ; GFX6-LABEL: fptoui_f64_to_i64_to_f64:
374+ ; GFX6: ; %bb.0: ; %entry
375+ ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
376+ ; GFX6-NEXT: s_mov_b32 s6, -1
377+ ; GFX6-NEXT: s_mov_b32 s5, 0xfffff
378+ ; GFX6-NEXT: s_mov_b32 s4, s6
379+ ; GFX6-NEXT: v_not_b32_e32 v0, 31
380+ ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
381+ ; GFX6-NEXT: s_bfe_u32 s7, s3, 0xb0014
382+ ; GFX6-NEXT: s_addk_i32 s7, 0xfc01
383+ ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s7
384+ ; GFX6-NEXT: s_and_b32 s8, s3, 0x80000000
385+ ; GFX6-NEXT: s_andn2_b64 s[4:5], s[2:3], s[4:5]
386+ ; GFX6-NEXT: s_cmp_lt_i32 s7, 0
387+ ; GFX6-NEXT: s_cselect_b32 s4, 0, s4
388+ ; GFX6-NEXT: s_cselect_b32 s5, s8, s5
389+ ; GFX6-NEXT: s_cmp_gt_i32 s7, 51
390+ ; GFX6-NEXT: s_cselect_b32 s3, s3, s5
391+ ; GFX6-NEXT: s_cselect_b32 s2, s2, s4
392+ ; GFX6-NEXT: v_ldexp_f64 v[0:1], s[2:3], v0
393+ ; GFX6-NEXT: v_mov_b32_e32 v4, -1
394+ ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
395+ ; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
396+ ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
397+ ; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3
398+ ; GFX6-NEXT: s_mov_b32 s4, 0
399+ ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
400+ ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
401+ ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
402+ ; GFX6-NEXT: v_mov_b32_e32 v2, s2
403+ ; GFX6-NEXT: s_mov_b32 s5, 0xc1f00000
404+ ; GFX6-NEXT: v_mov_b32_e32 v3, s3
405+ ; GFX6-NEXT: v_fma_f64 v[2:3], v[0:1], s[4:5], v[2:3]
406+ ; GFX6-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
407+ ; GFX6-NEXT: v_cvt_u32_f64_e32 v2, v[2:3]
408+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
409+ ; GFX6-NEXT: s_mov_b32 s4, s0
410+ ; GFX6-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
411+ ; GFX6-NEXT: v_cvt_f64_u32_e32 v[2:3], v2
412+ ; GFX6-NEXT: s_mov_b32 s5, s1
413+ ; GFX6-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
414+ ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
415+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
416+ ; GFX6-NEXT: s_endpgm
417+ ;
418+ ; GFX9-LABEL: fptoui_f64_to_i64_to_f64:
419+ ; GFX9: ; %bb.0: ; %entry
420+ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
421+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0
422+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
423+ ; GFX9-NEXT: v_trunc_f64_e64 v[0:1], |s[2:3]|
424+ ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
425+ ; GFX9-NEXT: s_endpgm
204426entry:
205427 %ui = fptoui double %x to i64
206428 %fp = uitofp i64 %ui to double
0 commit comments