Skip to content

Commit b023cc5

Browse files
piotrAMDmahesh-attarde
authored andcommitted
[AMDGPU][True16][CodeGen] Avoid setting hi part in copysign (llvm#160891)
This is a temporary fix for a regression from llvm#154875. The new pattern sets the hi part of V_BFI result and that confuses si-fix-sgpr-copies - where the proper fix is likely to be. During si-fix-sgpr-copies, an incorrect fold happens: %86:vgpr_32 = V_BFI_B32_e64 %87:sreg_32 = COPY %86.hi16:vgpr_32 %95:vgpr_32 = nofpexcept V_PACK_B32_F16_t16_e64 0, killed %87:sreg_32, 0, %63:vgpr_16, 0, 0 into %86:vgpr_32 = V_BFI_B32_e64 %95:vgpr_32 = nofpexcept V_PACK_B32_F16_t16_e64 0, %86.lo16:vgpr_32, 0, %63:vgpr_16, 0, 0 Fixes: Vulkan CTS dEQP-VK.glsl.builtin.precision_fp16_storage32b.*.
1 parent d96c493 commit b023cc5

File tree

3 files changed

+67
-94
lines changed

3 files changed

+67
-94
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2287,8 +2287,9 @@ def : GCNPat <
22872287

22882288
def : GCNPat <
22892289
(fcopysign fp16vt:$src0, f32:$src1),
2290-
(EXTRACT_SUBREG (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fff0000)),
2291-
(REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16, $src0, hi16), $src1), hi16)
2290+
(EXTRACT_SUBREG (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)),
2291+
(REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
2292+
(V_LSHRREV_B32_e64 (i32 16), $src1)), lo16)
22922293
>;
22932294

22942295
def : GCNPat <

llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll

Lines changed: 35 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -231,22 +231,13 @@ define bfloat @v_copysign_bf16_f32(bfloat %mag, float %sign.f32) {
231231
; GFX10-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
232232
; GFX10-NEXT: s_setpc_b64 s[30:31]
233233
;
234-
; GFX11TRUE16-LABEL: v_copysign_bf16_f32:
235-
; GFX11TRUE16: ; %bb.0:
236-
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
238-
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
239-
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v1
240-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
241-
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
242-
;
243-
; GFX11FAKE16-LABEL: v_copysign_bf16_f32:
244-
; GFX11FAKE16: ; %bb.0:
245-
; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246-
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
247-
; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
248-
; GFX11FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
249-
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
234+
; GFX11-LABEL: v_copysign_bf16_f32:
235+
; GFX11: ; %bb.0:
236+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237+
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
238+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
239+
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
240+
; GFX11-NEXT: s_setpc_b64 s[30:31]
250241
%sign = fptrunc float %sign.f32 to bfloat
251242
%op = call bfloat @llvm.copysign.bf16(bfloat %mag, bfloat %sign)
252243
ret bfloat %op
@@ -298,22 +289,13 @@ define bfloat @v_copysign_bf16_f64(bfloat %mag, double %sign.f64) {
298289
; GFX10-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
299290
; GFX10-NEXT: s_setpc_b64 s[30:31]
300291
;
301-
; GFX11TRUE16-LABEL: v_copysign_bf16_f64:
302-
; GFX11TRUE16: ; %bb.0:
303-
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
305-
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
306-
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v2
307-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
308-
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
309-
;
310-
; GFX11FAKE16-LABEL: v_copysign_bf16_f64:
311-
; GFX11FAKE16: ; %bb.0:
312-
; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313-
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
314-
; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
315-
; GFX11FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
316-
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
292+
; GFX11-LABEL: v_copysign_bf16_f64:
293+
; GFX11: ; %bb.0:
294+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295+
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2
296+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
297+
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
298+
; GFX11-NEXT: s_setpc_b64 s[30:31]
317299
%sign = fptrunc double %sign.f64 to bfloat
318300
%op = call bfloat @llvm.copysign.bf16(bfloat %mag, bfloat %sign)
319301
ret bfloat %op
@@ -499,9 +481,10 @@ define amdgpu_ps i32 @s_copysign_bf16_f32(bfloat inreg %mag, float inreg %sign.f
499481
;
500482
; GFX11TRUE16-LABEL: s_copysign_bf16_f32:
501483
; GFX11TRUE16: ; %bb.0:
502-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
484+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
485+
; GFX11TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s1
503486
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
504-
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
487+
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
505488
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
506489
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
507490
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
@@ -575,9 +558,10 @@ define amdgpu_ps i32 @s_copysign_bf16_f64(bfloat inreg %mag, double inreg %sign.
575558
;
576559
; GFX11TRUE16-LABEL: s_copysign_bf16_f64:
577560
; GFX11TRUE16: ; %bb.0:
578-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
561+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
562+
; GFX11TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s2
579563
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
580-
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
564+
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
581565
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
582566
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
583567
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
@@ -3677,9 +3661,10 @@ define amdgpu_ps i16 @s_copysign_out_bf16_mag_bf16_sign_f32(bfloat inreg %mag, f
36773661
;
36783662
; GFX11TRUE16-LABEL: s_copysign_out_bf16_mag_bf16_sign_f32:
36793663
; GFX11TRUE16: ; %bb.0:
3680-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
3664+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
3665+
; GFX11TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s1
36813666
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3682-
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
3667+
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
36833668
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
36843669
; GFX11TRUE16-NEXT: ; return to shader part epilog
36853670
;
@@ -3744,9 +3729,10 @@ define amdgpu_ps i16 @s_copysign_out_bf16_mag_bf16_sign_f64(bfloat inreg %mag, d
37443729
;
37453730
; GFX11TRUE16-LABEL: s_copysign_out_bf16_mag_bf16_sign_f64:
37463731
; GFX11TRUE16: ; %bb.0:
3747-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
3732+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
3733+
; GFX11TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s2
37483734
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3749-
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
3735+
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
37503736
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
37513737
; GFX11TRUE16-NEXT: ; return to shader part epilog
37523738
;
@@ -6700,15 +6686,16 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64(<3 x bfloat> %m
67006686
; GFX11TRUE16-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64:
67016687
; GFX11TRUE16: ; %bb.0:
67026688
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6703-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
6704-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
6705-
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v5
6689+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
6690+
; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v5
6691+
; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6692+
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6693+
; GFX11TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v4
6694+
; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v7
6695+
; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v3
67066696
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
6707-
; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff0000, v1, v3
6708-
; GFX11TRUE16-NEXT: v_bfi_b32 v2, 0x7fff0000, v2, v7
6709-
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6710-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
6711-
; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
6697+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
6698+
; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff, v1, v4
67126699
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
67136700
;
67146701
; GFX11FAKE16-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64:

llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll

Lines changed: 29 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -776,22 +776,13 @@ define half @v_copysign_out_f16_mag_f16_sign_f32(half %mag, float %sign) {
776776
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
777777
; GFX9-NEXT: s_setpc_b64 s[30:31]
778778
;
779-
; GFX11-TRUE16-LABEL: v_copysign_out_f16_mag_f16_sign_f32:
780-
; GFX11-TRUE16: ; %bb.0:
781-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
782-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
783-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
784-
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v1
785-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
786-
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
787-
;
788-
; GFX11-FAKE16-LABEL: v_copysign_out_f16_mag_f16_sign_f32:
789-
; GFX11-FAKE16: ; %bb.0:
790-
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
791-
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
792-
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
793-
; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
794-
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
779+
; GFX11-LABEL: v_copysign_out_f16_mag_f16_sign_f32:
780+
; GFX11: ; %bb.0:
781+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
782+
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
783+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
784+
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
785+
; GFX11-NEXT: s_setpc_b64 s[30:31]
795786
%sign.trunc = fptrunc float %sign to half
796787
%out = call half @llvm.copysign.f16(half %mag, half %sign.trunc)
797788
ret half %out
@@ -823,22 +814,13 @@ define half @v_copysign_out_f16_mag_f16_sign_f64(half %mag, double %sign) {
823814
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
824815
; GFX9-NEXT: s_setpc_b64 s[30:31]
825816
;
826-
; GFX11-TRUE16-LABEL: v_copysign_out_f16_mag_f16_sign_f64:
827-
; GFX11-TRUE16: ; %bb.0:
828-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
830-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
831-
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v2
832-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
833-
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
834-
;
835-
; GFX11-FAKE16-LABEL: v_copysign_out_f16_mag_f16_sign_f64:
836-
; GFX11-FAKE16: ; %bb.0:
837-
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
838-
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
839-
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
840-
; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
841-
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
817+
; GFX11-LABEL: v_copysign_out_f16_mag_f16_sign_f64:
818+
; GFX11: ; %bb.0:
819+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820+
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2
821+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
822+
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
823+
; GFX11-NEXT: s_setpc_b64 s[30:31]
842824
%sign.trunc = fptrunc double %sign to half
843825
%out = call half @llvm.copysign.f16(half %mag, half %sign.trunc)
844826
ret half %out
@@ -2832,9 +2814,10 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f16_sign_f32(half inreg %mag, float
28322814
;
28332815
; GFX11-TRUE16-LABEL: s_copysign_out_f16_mag_f16_sign_f32:
28342816
; GFX11-TRUE16: ; %bb.0:
2835-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
2817+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
2818+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s1
28362819
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2837-
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
2820+
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
28382821
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
28392822
; GFX11-TRUE16-NEXT: ; return to shader part epilog
28402823
;
@@ -2883,9 +2866,10 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f16_sign_f64(half inreg %mag, doubl
28832866
;
28842867
; GFX11-TRUE16-LABEL: s_copysign_out_f16_mag_f16_sign_f64:
28852868
; GFX11-TRUE16: ; %bb.0:
2886-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
2869+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
2870+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s2
28872871
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2888-
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
2872+
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
28892873
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
28902874
; GFX11-TRUE16-NEXT: ; return to shader part epilog
28912875
;
@@ -5590,15 +5574,16 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f16_sign_v3f64(<3 x half> %mag, <3
55905574
; GFX11-TRUE16-LABEL: v_copysign_out_v3f16_mag_v3f16_sign_v3f64:
55915575
; GFX11-TRUE16: ; %bb.0:
55925576
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5593-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
5594-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
5595-
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v5
5577+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
5578+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v5
5579+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3
5580+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5581+
; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v4
5582+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v7
5583+
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v3
55965584
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
5597-
; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fff0000, v1, v3
5598-
; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff0000, v2, v7
5599-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5600-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
5601-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
5585+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
5586+
; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fff, v1, v4
56025587
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
56035588
;
56045589
; GFX11-FAKE16-LABEL: v_copysign_out_v3f16_mag_v3f16_sign_v3f64:

0 commit comments

Comments
 (0)