11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s
3- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
3+ ; RUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
44; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s
5- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
5+ ; RUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
66; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s
7- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
7+ ; RUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
88; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
9- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
9+ ; RUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
1010; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s
1111; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s
12- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
13- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
12+ ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
13+ ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
14+
15+ ; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and
16+ ; moving those SGPRs into VGPRs.
1417
1518define amdgpu_kernel void @sgpr_isnan_f16 (ptr addrspace (1 ) %out , half %x ) {
1619; GFX7SELDAG-LABEL: sgpr_isnan_f16:
@@ -34,48 +37,98 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
3437; GFX7GLISEL-NEXT: s_mov_b32 s2, -1
3538; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0)
3639; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff
40+ ; GFX7GLISEL-NEXT: s_and_b32 s3, 0xffff, s3
3741; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00
38- ; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0
39- ; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000
42+ ; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0
4043; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3
4144; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000
4245; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
4346; GFX7GLISEL-NEXT: s_endpgm
4447;
45- ; GFX8CHECK-LABEL: sgpr_isnan_f16:
46- ; GFX8CHECK: ; %bb.0:
47- ; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
48- ; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
49- ; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0)
50- ; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
51- ; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0
52- ; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3]
53- ; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1
54- ; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2
55- ; GFX8CHECK-NEXT: s_endpgm
56- ;
57- ; GFX9CHECK-LABEL: sgpr_isnan_f16:
58- ; GFX9CHECK: ; %bb.0:
59- ; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
60- ; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
61- ; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0
62- ; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0)
63- ; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
64- ; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3]
65- ; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1]
66- ; GFX9CHECK-NEXT: s_endpgm
67- ;
68- ; GFX10CHECK-LABEL: sgpr_isnan_f16:
69- ; GFX10CHECK: ; %bb.0:
70- ; GFX10CHECK-NEXT: s_clause 0x1
71- ; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
72- ; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
73- ; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0
74- ; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0)
75- ; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3
76- ; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
77- ; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1]
78- ; GFX10CHECK-NEXT: s_endpgm
48+ ; GFX8SELDAG-LABEL: sgpr_isnan_f16:
49+ ; GFX8SELDAG: ; %bb.0:
50+ ; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
51+ ; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
52+ ; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0)
53+ ; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
54+ ; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0
55+ ; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3]
56+ ; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1
57+ ; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2
58+ ; GFX8SELDAG-NEXT: s_endpgm
59+ ;
60+ ; GFX8GLISEL-LABEL: sgpr_isnan_f16:
61+ ; GFX8GLISEL: ; %bb.0:
62+ ; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
63+ ; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
64+ ; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0)
65+ ; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
66+ ; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
67+ ; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0
68+ ; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1
69+ ; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0
70+ ; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0
71+ ; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0
72+ ; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2
73+ ; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1
74+ ; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2
75+ ; GFX8GLISEL-NEXT: s_endpgm
76+ ;
77+ ; GFX9SELDAG-LABEL: sgpr_isnan_f16:
78+ ; GFX9SELDAG: ; %bb.0:
79+ ; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
80+ ; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
81+ ; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0
82+ ; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0)
83+ ; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
84+ ; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3]
85+ ; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1]
86+ ; GFX9SELDAG-NEXT: s_endpgm
87+ ;
88+ ; GFX9GLISEL-LABEL: sgpr_isnan_f16:
89+ ; GFX9GLISEL: ; %bb.0:
90+ ; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
91+ ; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
92+ ; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0
93+ ; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0)
94+ ; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
95+ ; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
96+ ; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0
97+ ; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1
98+ ; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0
99+ ; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0
100+ ; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2
101+ ; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1]
102+ ; GFX9GLISEL-NEXT: s_endpgm
103+ ;
104+ ; GFX10SELDAG-LABEL: sgpr_isnan_f16:
105+ ; GFX10SELDAG: ; %bb.0:
106+ ; GFX10SELDAG-NEXT: s_clause 0x1
107+ ; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
108+ ; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
109+ ; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0
110+ ; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0)
111+ ; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s2, s2, 3
112+ ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
113+ ; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1]
114+ ; GFX10SELDAG-NEXT: s_endpgm
115+ ;
116+ ; GFX10GLISEL-LABEL: sgpr_isnan_f16:
117+ ; GFX10GLISEL: ; %bb.0:
118+ ; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
119+ ; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0
120+ ; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0)
121+ ; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s2, s0, 3
122+ ; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
123+ ; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0
124+ ; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0
125+ ; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1
126+ ; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0
127+ ; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0
128+ ; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2
129+ ; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0)
130+ ; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1]
131+ ; GFX10GLISEL-NEXT: s_endpgm
79132;
80133; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16:
81134; GFX11SELDAG-TRUE16: ; %bb.0:
@@ -103,26 +156,36 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
103156;
104157; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16:
105158; GFX11GLISEL-TRUE16: ; %bb.0:
106- ; GFX11GLISEL-TRUE16-NEXT: s_clause 0x1
107- ; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
108- ; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
159+ ; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x2c
109160; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
110161; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
111- ; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l
112- ; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
162+ ; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s0, v0.l
163+ ; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
164+ ; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 vcc_lo, 0
165+ ; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0
166+ ; GFX11GLISEL-TRUE16-NEXT: s_and_b32 s2, s2, 1
167+ ; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
168+ ; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
169+ ; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
170+ ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
113171; GFX11GLISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
114172; GFX11GLISEL-TRUE16-NEXT: s_endpgm
115173;
116174; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16:
117175; GFX11GLISEL-FAKE16: ; %bb.0:
118- ; GFX11GLISEL-FAKE16-NEXT: s_clause 0x1
119- ; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
176+ ; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x2c
177+ ; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
178+ ; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
179+ ; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s0, 3
120180; GFX11GLISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
121- ; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0
181+ ; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
182+ ; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0
183+ ; GFX11GLISEL-FAKE16-NEXT: s_and_b32 s2, s2, 1
184+ ; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
185+ ; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, -1, 0
186+ ; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2
122187; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
123- ; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3
124- ; GFX11GLISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
125- ; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1]
188+ ; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1]
126189; GFX11GLISEL-FAKE16-NEXT: s_endpgm
127190 %result = call i1 @llvm.is.fpclass.f16 (half %x , i32 3 )
128191 %sext = sext i1 %result to i32
0 commit comments