Skip to content

Commit cb58129

Browse files
authored
[AMDGPU][GlobalISel] Add RegBankLegalize support for G_IS_FPCLASS (#167575)
1 parent 727ee7e commit cb58129

File tree

3 files changed

+347
-142
lines changed

3 files changed

+347
-142
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -960,6 +960,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
960960
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
961961
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
962962

963+
addRulesForGOpcs({G_IS_FPCLASS})
964+
.Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
965+
.Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
966+
.Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
967+
.Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
968+
.Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
969+
.Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
970+
963971
using namespace Intrinsic;
964972

965973
addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});

llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll

Lines changed: 116 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
3+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
44
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s
5-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
5+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
66
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s
7-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
7+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
88
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
9-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
9+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
1010
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s
1111
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s
12-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
13-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
12+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
13+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
14+
15+
; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and
16+
; moving those SGPRs into VGPRs.
1417

1518
define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
1619
; GFX7SELDAG-LABEL: sgpr_isnan_f16:
@@ -34,48 +37,98 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
3437
; GFX7GLISEL-NEXT: s_mov_b32 s2, -1
3538
; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0)
3639
; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff
40+
; GFX7GLISEL-NEXT: s_and_b32 s3, 0xffff, s3
3741
; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00
38-
; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0
39-
; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000
42+
; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0
4043
; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3
4144
; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000
4245
; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
4346
; GFX7GLISEL-NEXT: s_endpgm
4447
;
45-
; GFX8CHECK-LABEL: sgpr_isnan_f16:
46-
; GFX8CHECK: ; %bb.0:
47-
; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
48-
; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
49-
; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0)
50-
; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
51-
; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0
52-
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3]
53-
; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1
54-
; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2
55-
; GFX8CHECK-NEXT: s_endpgm
56-
;
57-
; GFX9CHECK-LABEL: sgpr_isnan_f16:
58-
; GFX9CHECK: ; %bb.0:
59-
; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
60-
; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
61-
; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0
62-
; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0)
63-
; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
64-
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3]
65-
; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1]
66-
; GFX9CHECK-NEXT: s_endpgm
67-
;
68-
; GFX10CHECK-LABEL: sgpr_isnan_f16:
69-
; GFX10CHECK: ; %bb.0:
70-
; GFX10CHECK-NEXT: s_clause 0x1
71-
; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
72-
; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
73-
; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0
74-
; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0)
75-
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3
76-
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
77-
; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1]
78-
; GFX10CHECK-NEXT: s_endpgm
48+
; GFX8SELDAG-LABEL: sgpr_isnan_f16:
49+
; GFX8SELDAG: ; %bb.0:
50+
; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
51+
; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
52+
; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0)
53+
; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
54+
; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0
55+
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3]
56+
; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1
57+
; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2
58+
; GFX8SELDAG-NEXT: s_endpgm
59+
;
60+
; GFX8GLISEL-LABEL: sgpr_isnan_f16:
61+
; GFX8GLISEL: ; %bb.0:
62+
; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
63+
; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
64+
; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0)
65+
; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
66+
; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
67+
; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0
68+
; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1
69+
; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0
70+
; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0
71+
; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0
72+
; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2
73+
; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1
74+
; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2
75+
; GFX8GLISEL-NEXT: s_endpgm
76+
;
77+
; GFX9SELDAG-LABEL: sgpr_isnan_f16:
78+
; GFX9SELDAG: ; %bb.0:
79+
; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
80+
; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
81+
; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0
82+
; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0)
83+
; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
84+
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3]
85+
; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1]
86+
; GFX9SELDAG-NEXT: s_endpgm
87+
;
88+
; GFX9GLISEL-LABEL: sgpr_isnan_f16:
89+
; GFX9GLISEL: ; %bb.0:
90+
; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
91+
; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
92+
; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0
93+
; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0)
94+
; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
95+
; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
96+
; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0
97+
; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1
98+
; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0
99+
; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0
100+
; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2
101+
; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1]
102+
; GFX9GLISEL-NEXT: s_endpgm
103+
;
104+
; GFX10SELDAG-LABEL: sgpr_isnan_f16:
105+
; GFX10SELDAG: ; %bb.0:
106+
; GFX10SELDAG-NEXT: s_clause 0x1
107+
; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
108+
; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
109+
; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0
110+
; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0)
111+
; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s2, s2, 3
112+
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
113+
; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1]
114+
; GFX10SELDAG-NEXT: s_endpgm
115+
;
116+
; GFX10GLISEL-LABEL: sgpr_isnan_f16:
117+
; GFX10GLISEL: ; %bb.0:
118+
; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
119+
; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0
120+
; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0)
121+
; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s2, s0, 3
122+
; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
123+
; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0
124+
; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0
125+
; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1
126+
; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0
127+
; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0
128+
; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2
129+
; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0)
130+
; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1]
131+
; GFX10GLISEL-NEXT: s_endpgm
79132
;
80133
; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16:
81134
; GFX11SELDAG-TRUE16: ; %bb.0:
@@ -103,26 +156,36 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
103156
;
104157
; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16:
105158
; GFX11GLISEL-TRUE16: ; %bb.0:
106-
; GFX11GLISEL-TRUE16-NEXT: s_clause 0x1
107-
; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
108-
; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
159+
; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x2c
109160
; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
110161
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
111-
; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l
112-
; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
162+
; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s0, v0.l
163+
; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
164+
; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 vcc_lo, 0
165+
; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0
166+
; GFX11GLISEL-TRUE16-NEXT: s_and_b32 s2, s2, 1
167+
; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
168+
; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
169+
; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
170+
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
113171
; GFX11GLISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
114172
; GFX11GLISEL-TRUE16-NEXT: s_endpgm
115173
;
116174
; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16:
117175
; GFX11GLISEL-FAKE16: ; %bb.0:
118-
; GFX11GLISEL-FAKE16-NEXT: s_clause 0x1
119-
; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
176+
; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x2c
177+
; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
178+
; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
179+
; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s0, 3
120180
; GFX11GLISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
121-
; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0
181+
; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
182+
; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0
183+
; GFX11GLISEL-FAKE16-NEXT: s_and_b32 s2, s2, 1
184+
; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
185+
; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, -1, 0
186+
; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2
122187
; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
123-
; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3
124-
; GFX11GLISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
125-
; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1]
188+
; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1]
126189
; GFX11GLISEL-FAKE16-NEXT: s_endpgm
127190
%result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
128191
%sext = sext i1 %result to i32

0 commit comments

Comments
 (0)