Skip to content

Commit 67a92a4

Browse files
authored
[amd-staging] reenable true16 mode for gfx11 (llvm#3726)
2 parents a33e404 + e1e368f commit 67a92a4

File tree

7 files changed

+20
-18
lines changed

7 files changed

+20
-18
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,7 +1920,8 @@ def FeatureISAVersion11_Common : FeatureSet<
19201920
FeatureImageInsts,
19211921
FeaturePackedTID,
19221922
FeatureVcmpxPermlaneHazard,
1923-
FeatureMemoryAtomicFAddF32DenormalSupport]>;
1923+
FeatureMemoryAtomicFAddF32DenormalSupport,
1924+
FeatureRealTrue16Insts]>;
19241925

19251926
// There are few workarounds that need to be
19261927
// added to all targets. This pessimizes codegen
@@ -1940,8 +1941,7 @@ def FeatureISAVersion11_0_Common : FeatureSet<
19401941
[FeatureMSAALoadDstSelBug,
19411942
FeatureVALUTransUseHazard,
19421943
FeatureMADIntraFwdBug,
1943-
FeaturePrivEnabledTrap2NopBug
1944-
])>;
1944+
FeaturePrivEnabledTrap2NopBug])>;
19451945

19461946
def FeatureISAVersion11_0_0 : FeatureSet<
19471947
!listconcat(FeatureISAVersion11_0_Common.Features,

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6534,9 +6534,9 @@ define void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) {
65346534
; GFX11-LABEL: insert_very_small_from_very_large:
65356535
; GFX11: ; %bb.0: ; %bb
65366536
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6537-
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
6538-
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
6539-
; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
6537+
; GFX11-NEXT: v_lshrrev_b16 v0.l, 1, v0.l
6538+
; GFX11-NEXT: v_and_b16 v0.l, v0.l, 1
6539+
; GFX11-NEXT: v_lshlrev_b16 v0.l, 1, v0.l
65406540
; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
65416541
; GFX11-NEXT: flat_store_b8 v[16:17], v0
65426542
; GFX11-NEXT: s_waitcnt lgkmcnt(0)

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
33
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
44
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
5-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
6-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX11 %s
5+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=-real-true16 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
6+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX11 %s
77

88
---
99
name: test_fdiv_s16

llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2549,12 +2549,13 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16
25492549
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25502550
; GFX11-SDAG-NEXT: s_mov_b32 s4, s33
25512551
; GFX11-SDAG-NEXT: s_mov_b32 s33, s32
2552-
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
2552+
; GFX11-SDAG-NEXT: v_mov_b16_e32 v1.h, 0
2553+
; GFX11-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l
25532554
; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo
25542555
; GFX11-SDAG-NEXT: s_mov_b32 s0, 0
25552556
; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16
25562557
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2557-
; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
2558+
; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15
25582559
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff0, v0
25592560
; GFX11-SDAG-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1
25602561
; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1

llvm/test/CodeGen/AMDGPU/fptrunc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(ptr addrspace(1) %out, double
620620
; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
621621
; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
622622
; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
623-
; GFX11-SAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
623+
; GFX11-SAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0.l, v0
624624
; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
625625
; GFX11-SAFE-SDAG-NEXT: s_endpgm
626626
;
@@ -632,7 +632,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(ptr addrspace(1) %out, double
632632
; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
633633
; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
634634
; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
635-
; GFX11-SAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
635+
; GFX11-SAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0.l, v0
636636
; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
637637
; GFX11-SAFE-GISEL-NEXT: s_endpgm
638638
;

llvm/test/CodeGen/AMDGPU/uaddo.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ define amdgpu_kernel void @v_uaddo_i16(ptr addrspace(1) %out, ptr addrspace(1) %
739739
; GFX11-NEXT: v_mov_b32_e32 v0, 0
740740
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
741741
; GFX11-NEXT: s_clause 0x1
742-
; GFX11-NEXT: global_load_u16 v1, v0, s[4:5]
742+
; GFX11-NEXT: global_load_d16_b16 v1, v0, s[4:5]
743743
; GFX11-NEXT: global_load_u16 v2, v0, s[6:7]
744744
; GFX11-NEXT: s_waitcnt vmcnt(0)
745745
; GFX11-NEXT: v_add_nc_u32_e32 v2, v1, v2
@@ -1242,14 +1242,15 @@ define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128
12421242
; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s1, v3, vcc_lo
12431243
; GFX11-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s2, v4, vcc_lo
12441244
; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s3, v5, vcc_lo
1245-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1246-
; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[2:3]
1247-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
1245+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
12481246
; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[4:5]
1247+
; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
1248+
; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[2:3]
1249+
; GFX11-NEXT: v_mov_b16_e32 v2.l, v6.l
12491250
; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
12501251
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
12511252
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1252-
; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
1253+
; GFX11-NEXT: v_cndmask_b16 v2.l, v2.l, v3.l, vcc_lo
12531254
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
12541255
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
12551256
; GFX11-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/usubo.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ define amdgpu_kernel void @v_usubo_i16(ptr addrspace(1) %out, ptr addrspace(1) %
738738
; GFX11-NEXT: v_mov_b32_e32 v0, 0
739739
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
740740
; GFX11-NEXT: s_clause 0x1
741-
; GFX11-NEXT: global_load_u16 v1, v0, s[4:5]
741+
; GFX11-NEXT: global_load_d16_b16 v1, v0, s[4:5]
742742
; GFX11-NEXT: global_load_u16 v2, v0, s[6:7]
743743
; GFX11-NEXT: s_waitcnt vmcnt(0)
744744
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v1, v2

0 commit comments

Comments
 (0)