@@ -4030,7 +4030,7 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
40304030; VI-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
40314031; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v3
40324032; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
4033- ; VI-NEXT: v_mov_b32_e32 v2, 0x8000
4033+ ; VI-NEXT: v_mov_b32_e32 v2, 0xffff8000
40344034; VI-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
40354035; VI-NEXT: v_or_b32_e32 v0, v1, v0
40364036; VI-NEXT: s_setpc_b64 s[30:31]
@@ -4045,19 +4045,22 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
40454045; GFX11-NEXT: v_bfe_u32 v3, v1, 20, 11
40464046; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
40474047; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
4048- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1 ) | instid1(VALU_DEP_1 )
4048+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT ) | instid1(VALU_DEP_3 )
40494049; GFX11-NEXT: v_sub_nc_u32_e32 v4, 0x3f1, v3
4050+ ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff8000, v1
40504051; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
4052+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
40514053; GFX11-NEXT: v_and_or_b32 v0, 0xffe, v2, v0
4052- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
40534054; GFX11-NEXT: v_med3_i32 v2, v4, 0, 13
4055+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
40544056; GFX11-NEXT: v_or_b32_e32 v4, 0x1000, v0
40554057; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
40564058; GFX11-NEXT: v_lshrrev_b32_e32 v5, v2, v4
4059+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
40574060; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v5
4058- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
40594061; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v4
40604062; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
4063+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
40614064; GFX11-NEXT: v_or_b32_e32 v2, v5, v2
40624065; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xfffffc10, v3
40634066; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -4078,11 +4081,12 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
40784081; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
40794082; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
40804083; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v3
4084+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
40814085; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
40824086; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
4083- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
40844087; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
4085- ; GFX11-NEXT: v_and_or_b32 v0, 0x8000, v1, v0
4088+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4089+ ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
40864090; GFX11-NEXT: s_setpc_b64 s[30:31]
40874091 %fpround = fptrunc double %a to half
40884092 %fneg = fneg half %fpround
@@ -4180,7 +4184,7 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
41804184; VI-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
41814185; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v3
41824186; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
4183- ; VI-NEXT: v_mov_b32_e32 v2, 0x8000
4187+ ; VI-NEXT: v_mov_b32_e32 v2, 0xffff8000
41844188; VI-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
41854189; VI-NEXT: v_or_b32_e32 v0, v1, v0
41864190; VI-NEXT: s_setpc_b64 s[30:31]
@@ -4195,20 +4199,22 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
41954199; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
41964200; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
41974201; GFX11-NEXT: v_sub_nc_u32_e32 v4, 0x3f1, v3
4202+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4203+ ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff8000, v1
41984204; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
4199- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
42004205; GFX11-NEXT: v_and_or_b32 v0, 0xffe, v2, v0
4206+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
42014207; GFX11-NEXT: v_med3_i32 v2, v4, 0, 13
42024208; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
42034209; GFX11-NEXT: v_or_b32_e32 v4, 0x1000, v0
42044210; GFX11-NEXT: v_lshrrev_b32_e32 v5, v2, v4
4205- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
42064211; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v5
4212+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
42074213; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v4
42084214; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
4209- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
42104215; GFX11-NEXT: v_or_b32_e32 v2, v5, v2
42114216; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xfffffc10, v3
4217+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
42124218; GFX11-NEXT: v_lshl_or_b32 v4, v3, 12, v0
42134219; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v3
42144220; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -4229,9 +4235,9 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
42294235; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
42304236; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
42314237; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
4238+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
42324239; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
4233- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4234- ; GFX11-NEXT: v_and_or_b32 v0, 0x8000, v1, v0
4240+ ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
42354241; GFX11-NEXT: s_setpc_b64 s[30:31]
42364242 %fneg.a = fneg double %a
42374243 %fpround = fptrunc double %fneg.a to half
@@ -4336,7 +4342,7 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
43364342; VI-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc
43374343; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v5
43384344; VI-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
4339- ; VI-NEXT: v_mov_b32_e32 v4, 0x8000
4345+ ; VI-NEXT: v_mov_b32_e32 v4, 0xffff8000
43404346; VI-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
43414347; VI-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
43424348; VI-NEXT: v_or_b32_e32 v0, v1, v0
@@ -4599,7 +4605,7 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
45994605; VI-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc
46004606; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v6
46014607; VI-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
4602- ; VI-NEXT: v_mov_b32_e32 v4, 0x8000
4608+ ; VI-NEXT: v_mov_b32_e32 v4, 0xffff8000
46034609; VI-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
46044610; VI-NEXT: v_or_b32_e32 v0, v1, v0
46054611; VI-NEXT: v_mov_b32_e32 v1, v2
@@ -4757,7 +4763,7 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
47574763; VI-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
47584764; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v3
47594765; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
4760- ; VI-NEXT: v_mov_b32_e32 v2, 0x8000
4766+ ; VI-NEXT: v_mov_b32_e32 v2, 0xffff8000
47614767; VI-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
47624768; VI-NEXT: v_or_b32_e32 v1, v1, v0
47634769; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v1
@@ -4807,9 +4813,9 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
48074813; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
48084814; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
48094815; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
4810- ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
4816+ ; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_and_b32 v1, 0xffff8000, v1
48114817; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4812- ; GFX11-NEXT: v_and_or_b32 v1, 0x8000 , v1, v0
4818+ ; GFX11-NEXT: v_or_b32_e32 v1, v1, v0
48134819; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
48144820; GFX11-NEXT: s_setpc_b64 s[30:31]
48154821; GFX11-SAFE-TRUE16-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
0 commit comments