Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1964,6 +1964,8 @@ def : GCNPat <
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000)))
>;

foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let SubtargetPredicate = p in {
foreach fp16vt = [f16, bf16] in {
def : GCNPat <
(UniformUnaryFrag<fneg> (fp16vt SReg_32:$src)),
Expand All @@ -1980,6 +1982,7 @@ def : GCNPat <
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
>;
} // End foreach fp16vt = ...
} // let SubtargetPredicate = p

def : GCNPat <
(UniformUnaryFrag<fneg> (v2f16 SReg_32:$src)),
Expand Down
60 changes: 42 additions & 18 deletions llvm/test/CodeGen/AMDGPU/bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18652,12 +18652,20 @@ define amdgpu_ps i32 @s_fabs_bf16(bfloat inreg %a) {
; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fabs_bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s0, 0x7fff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
; GFX11-NEXT: ; return to shader part epilog
; GFX11TRUE16-LABEL: s_fabs_bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
; GFX11FAKE16-LABEL: s_fabs_bf16:
; GFX11FAKE16: ; %bb.0:
; GFX11FAKE16-NEXT: s_and_b32 s0, s0, 0x7fff
; GFX11FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11FAKE16-NEXT: s_and_b32 s0, 0xffff, s0
; GFX11FAKE16-NEXT: ; return to shader part epilog
%op = call bfloat @llvm.fabs.bf16(bfloat %a)
%cast = bitcast bfloat %op to i16
%zext = zext i16 %cast to i32
Expand Down Expand Up @@ -18747,12 +18755,20 @@ define amdgpu_ps i32 @s_fneg_bf16(bfloat inreg %a) {
; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fneg_bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_xor_b32 s0, s0, 0x8000
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
; GFX11-NEXT: ; return to shader part epilog
; GFX11TRUE16-LABEL: s_fneg_bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
; GFX11FAKE16-LABEL: s_fneg_bf16:
; GFX11FAKE16: ; %bb.0:
; GFX11FAKE16-NEXT: s_xor_b32 s0, s0, 0x8000
; GFX11FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11FAKE16-NEXT: s_and_b32 s0, 0xffff, s0
; GFX11FAKE16-NEXT: ; return to shader part epilog
%op = fneg bfloat %a
%cast = bitcast bfloat %op to i16
%zext = zext i16 %cast to i32
Expand Down Expand Up @@ -18859,12 +18875,20 @@ define amdgpu_ps i32 @s_fneg_fabs_bf16(bfloat inreg %a) {
; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fneg_fabs_bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_bitset1_b32 s0, 15
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
; GFX11-NEXT: ; return to shader part epilog
; GFX11TRUE16-LABEL: s_fneg_fabs_bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: v_or_b16 v0.l, 0x8000, s0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
; GFX11FAKE16-LABEL: s_fneg_fabs_bf16:
; GFX11FAKE16: ; %bb.0:
; GFX11FAKE16-NEXT: s_bitset1_b32 s0, 15
; GFX11FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11FAKE16-NEXT: s_and_b32 s0, 0xffff, s0
; GFX11FAKE16-NEXT: ; return to shader part epilog
%fabs = call bfloat @llvm.fabs.bf16(bfloat %a)
%op = fneg bfloat %fabs
%cast = bitcast bfloat %op to i16
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fabs.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ define amdgpu_kernel void @s_fabs_free_f16(ptr addrspace(1) %out, i16 %in) {
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0x7fff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down Expand Up @@ -118,9 +118,9 @@ define amdgpu_kernel void @s_fabs_f16(ptr addrspace(1) %out, half %in) {
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0x7fff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ define amdgpu_kernel void @s_test_copysign_f16_0(ptr addrspace(1) %out, half %ma
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0x7fff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down Expand Up @@ -201,9 +201,9 @@ define amdgpu_kernel void @s_test_copysign_f16_1(ptr addrspace(1) %out, half %ma
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0x7fff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down Expand Up @@ -266,9 +266,9 @@ define amdgpu_kernel void @s_test_copysign_f16_10.0(ptr addrspace(1) %out, half
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0x7fff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down Expand Up @@ -331,9 +331,9 @@ define amdgpu_kernel void @s_test_copysign_f16_neg1(ptr addrspace(1) %out, half
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_bitset1_b32 s2, 15
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down Expand Up @@ -396,9 +396,9 @@ define amdgpu_kernel void @s_test_copysign_f16_neg10(ptr addrspace(1) %out, half
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_bitset1_b32 s2, 15
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -695,12 +695,10 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_add_f16_e32 v0.l, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0/*Invalid register, operand has 'VS_16' register class*/, s0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@ define amdgpu_kernel void @fneg_fabs_free_f16(ptr addrspace(1) %out, i16 %in) {
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_bitset1_b32 s2, 15
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down Expand Up @@ -271,9 +271,9 @@ define amdgpu_kernel void @fneg_fabs_f16(ptr addrspace(1) %out, half %in) {
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_bitset1_b32 s2, 15
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down Expand Up @@ -327,7 +327,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(ptr addrspace(1) %out, ptr addrspace(
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3]
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x8000, v0
; GFX11-TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fneg.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ define amdgpu_kernel void @s_fneg_f16(ptr addrspace(1) %out, half %in) #0 {
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_xor_b32 s2, s2, 0x8000
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down Expand Up @@ -190,9 +190,9 @@ define amdgpu_kernel void @s_fneg_free_f16(ptr addrspace(1) %out, i16 %in) #0 {
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_xor_b32 s2, s2, 0x8000
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/CodeGen/AMDGPU/fpext.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -724,12 +724,10 @@ define amdgpu_kernel void @fneg_multi_use_fpext_f16_to_f32(
; GFX11-TRUE16-NEXT: s_mov_b32 s8, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s9, s3
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s0
; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v0, -v0.l
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
Expand Down Expand Up @@ -927,12 +925,10 @@ define amdgpu_kernel void @fabs_multi_use_fpext_f16_to_f32(
; GFX11-TRUE16-NEXT: s_mov_b32 s8, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s9, s3
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s0
; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0x7fff, v0.l
; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v0, |v0.l|
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
Expand Down Expand Up @@ -1130,12 +1126,10 @@ define amdgpu_kernel void @fabs_fneg_multi_use_fpext_f16_to_f32(
; GFX11-TRUE16-NEXT: s_mov_b32 s8, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s9, s3
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s0
; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[8:11], 0
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, 0x8000, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_or_b16 v1.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v0, -|v0.l|
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 dlc
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
Expand Down