@@ -4387,28 +4387,28 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
43874387}
43884388
43894389define <2 x half > @select_fneg_posk_src_sub_v2f16 (<2 x i32 > %c , <2 x half > %x ) {
4390- ; CI-SAFE- LABEL: select_fneg_posk_src_sub_v2f16:
4391- ; CI-SAFE : ; %bb.0:
4392- ; CI-SAFE- NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4393- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v3, v3
4394- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v2, v2
4395- ; CI-SAFE- NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4396- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v3, v3
4397- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v2, v2
4398- ; CI-SAFE- NEXT: v_add_f32_e32 v3, -4.0, v3
4399- ; CI-SAFE- NEXT: v_add_f32_e32 v2, -4.0, v2
4400- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v3, v3
4401- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v2, v2
4402- ; CI-SAFE- NEXT: v_lshlrev_b32_e32 v3, 16, v3
4403- ; CI-SAFE- NEXT: v_or_b32_e32 v2, v2, v3
4404- ; CI-SAFE- NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4405- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v3, v2
4406- ; CI-SAFE- NEXT: v_lshrrev_b32_e32 v2, 16, v2
4407- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v2, v2
4408- ; CI-SAFE- NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4409- ; CI-SAFE- NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4410- ; CI-SAFE- NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4411- ; CI-SAFE- NEXT: s_setpc_b64 s[30:31]
4390+ ; CI-LABEL: select_fneg_posk_src_sub_v2f16:
4391+ ; CI: ; %bb.0:
4392+ ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4393+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4394+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4395+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4396+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4397+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4398+ ; CI-NEXT: v_add_f32_e32 v3, -4.0, v3
4399+ ; CI-NEXT: v_add_f32_e32 v2, -4.0, v2
4400+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4401+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4402+ ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
4403+ ; CI-NEXT: v_or_b32_e32 v2, v2, v3
4404+ ; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4405+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
4406+ ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4407+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4408+ ; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4409+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4410+ ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4411+ ; CI-NEXT: s_setpc_b64 s[30:31]
44124412;
44134413; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
44144414; VI-SAFE: ; %bb.0:
@@ -4468,21 +4468,6 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
44684468; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
44694469; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
44704470;
4471- ; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
4472- ; CI-NSZ: ; %bb.0:
4473- ; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4474- ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
4475- ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
4476- ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4477- ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
4478- ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
4479- ; CI-NSZ-NEXT: v_sub_f32_e32 v2, 4.0, v2
4480- ; CI-NSZ-NEXT: v_sub_f32_e32 v3, 4.0, v3
4481- ; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4482- ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4483- ; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4484- ; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
4485- ;
44864471; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
44874472; VI-NSZ: ; %bb.0:
44884473; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4541,6 +4526,105 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
45414526 ret <2 x half > %select
45424527}
45434528
4529+ define <2 x half > @select_fneg_posk_src_sub_v2f16_nsz (<2 x i32 > %c , <2 x half > %x ) {
4530+ ; CI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
4531+ ; CI: ; %bb.0:
4532+ ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4533+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4534+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4535+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4536+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4537+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4538+ ; CI-NEXT: v_sub_f32_e32 v2, 4.0, v2
4539+ ; CI-NEXT: v_sub_f32_e32 v3, 4.0, v3
4540+ ; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4541+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4542+ ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4543+ ; CI-NEXT: s_setpc_b64 s[30:31]
4544+ ;
4545+ ; VI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
4546+ ; VI: ; %bb.0:
4547+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4548+ ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4549+ ; VI-NEXT: v_mov_b32_e32 v1, 0x4400
4550+ ; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4551+ ; VI-NEXT: v_sub_f16_e32 v2, 4.0, v2
4552+ ; VI-NEXT: v_mov_b32_e32 v3, 0x4000
4553+ ; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4554+ ; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
4555+ ; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
4556+ ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4557+ ; VI-NEXT: s_setpc_b64 s[30:31]
4558+ ;
4559+ ; GFX9-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
4560+ ; GFX9: ; %bb.0:
4561+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4562+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4563+ ; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4564+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
4565+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4566+ ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
4567+ ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4568+ ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
4569+ ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
4570+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
4571+ ;
4572+ ; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
4573+ ; GFX11-SAFE-TRUE16: ; %bb.0:
4574+ ; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4575+ ; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4576+ ; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4577+ ; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
4578+ ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4579+ ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
4580+ ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
4581+ ; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
4582+ ;
4583+ ; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
4584+ ; GFX11-SAFE-FAKE16: ; %bb.0:
4585+ ; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4586+ ; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4587+ ; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4588+ ; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
4589+ ; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
4590+ ; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
4591+ ; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
4592+ ; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
4593+ ; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
4594+ ; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
4595+ ; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
4596+ ;
4597+ ; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
4598+ ; GFX11-NSZ-TRUE16: ; %bb.0:
4599+ ; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4600+ ; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4601+ ; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4602+ ; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
4603+ ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4604+ ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
4605+ ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
4606+ ; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
4607+ ;
4608+ ; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
4609+ ; GFX11-NSZ-FAKE16: ; %bb.0:
4610+ ; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4611+ ; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4612+ ; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4613+ ; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
4614+ ; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
4615+ ; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
4616+ ; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
4617+ ; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
4618+ ; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
4619+ ; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
4620+ ; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
4621+ %cmp = icmp eq <2 x i32 > %c , zeroinitializer
4622+ %add = fsub <2 x half > %x , <half 4 .0 , half 4 .0 >
4623+ %fneg = fneg nsz <2 x half > %add
4624+ %select = select <2 x i1 > %cmp , <2 x half > %fneg , <2 x half > <half 2 .0 , half 2 .0 >
4625+ ret <2 x half > %select
4626+ }
4627+
45444628define <2 x half > @select_fneg_posk_src_mul_v2f16 (<2 x i32 > %c , <2 x half > %x ) {
45454629; CI-LABEL: select_fneg_posk_src_mul_v2f16:
45464630; CI: ; %bb.0:
@@ -5048,6 +5132,8 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
50485132
50495133attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
50505134;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
5135+ ; CI-NSZ: {{.*}}
5136+ ; CI-SAFE: {{.*}}
50515137; GFX11: {{.*}}
50525138; GFX11-NSZ: {{.*}}
50535139; GFX11-SAFE: {{.*}}
0 commit comments