Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);

addRulesForGOpcs({G_IS_FPCLASS})
.Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
.Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
.Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
.Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
.Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
.Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});

using namespace Intrinsic;

addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
Expand Down
166 changes: 113 additions & 53 deletions llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s

define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
; GFX7SELDAG-LABEL: sgpr_isnan_f16:
Expand All @@ -34,48 +34,98 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
; GFX7GLISEL-NEXT: s_mov_b32 s2, -1
; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff
; GFX7GLISEL-NEXT: s_and_b32 s3, 0xffff, s3
; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00
; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0
; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000
; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3
; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000
; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7GLISEL-NEXT: s_endpgm
;
; GFX8CHECK-LABEL: sgpr_isnan_f16:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0)
; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3]
; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1
; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2
; GFX8CHECK-NEXT: s_endpgm
;
; GFX9CHECK-LABEL: sgpr_isnan_f16:
; GFX9CHECK: ; %bb.0:
; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0
; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0)
; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3]
; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9CHECK-NEXT: s_endpgm
;
; GFX10CHECK-LABEL: sgpr_isnan_f16:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_clause 0x1
; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0
; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0)
; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10CHECK-NEXT: s_endpgm
; GFX8SELDAG-LABEL: sgpr_isnan_f16:
; GFX8SELDAG: ; %bb.0:
; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0
; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3]
; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1
; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2
; GFX8SELDAG-NEXT: s_endpgm
;
; GFX8GLISEL-LABEL: sgpr_isnan_f16:
; GFX8GLISEL: ; %bb.0:
; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0
; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1
; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0
; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0
; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GLISEL-NEXT: s_endpgm
;
; GFX9SELDAG-LABEL: sgpr_isnan_f16:
; GFX9SELDAG: ; %bb.0:
; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3]
; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9SELDAG-NEXT: s_endpgm
;
; GFX9GLISEL-LABEL: sgpr_isnan_f16:
; GFX9GLISEL: ; %bb.0:
; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3
; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0
; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1
; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0
; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0
; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GLISEL-NEXT: s_endpgm
;
; GFX10SELDAG-LABEL: sgpr_isnan_f16:
; GFX10SELDAG: ; %bb.0:
; GFX10SELDAG-NEXT: s_clause 0x1
; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s2, s2, 3
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10SELDAG-NEXT: s_endpgm
;
; GFX10GLISEL-LABEL: sgpr_isnan_f16:
; GFX10GLISEL: ; %bb.0:
; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s2, s0, 3
; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0
; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0
; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1
; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0
; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0
; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10GLISEL-NEXT: s_endpgm
;
; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16:
; GFX11SELDAG-TRUE16: ; %bb.0:
Expand Down Expand Up @@ -103,26 +153,36 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
;
; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16:
; GFX11GLISEL-TRUE16: ; %bb.0:
; GFX11GLISEL-TRUE16-NEXT: s_clause 0x1
; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l
; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s0, v0.l
; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 vcc_lo, 0
; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0
; GFX11GLISEL-TRUE16-NEXT: s_and_b32 s2, s2, 1
; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11GLISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11GLISEL-TRUE16-NEXT: s_endpgm
;
; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16:
; GFX11GLISEL-FAKE16: ; %bb.0:
; GFX11GLISEL-FAKE16-NEXT: s_clause 0x1
; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s0, 3
; GFX11GLISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0
; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0
; GFX11GLISEL-FAKE16-NEXT: s_and_b32 s2, s2, 1
; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2
; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3
; GFX11GLISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2
; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11GLISEL-FAKE16-NEXT: s_endpgm
%result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
%sext = sext i1 %result to i32
Expand Down
Loading
Loading