@@ -4148,28 +4148,28 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
41484148; --------------------------------------------------------------------------------
41494149
41504150define <2 x half > @select_fneg_posk_src_add_v2f16 (<2 x i32 > %c , <2 x half > %x , <2 x half > %y ) {
4151- ; CI-SAFE- LABEL: select_fneg_posk_src_add_v2f16:
4152- ; CI-SAFE : ; %bb.0:
4153- ; CI-SAFE- NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4154- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v3, v3
4155- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v2, v2
4156- ; CI-SAFE- NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4157- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v3, v3
4158- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v2, v2
4159- ; CI-SAFE- NEXT: v_add_f32_e32 v3, 4.0, v3
4160- ; CI-SAFE- NEXT: v_add_f32_e32 v2, 4.0, v2
4161- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v3, v3
4162- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v2, v2
4163- ; CI-SAFE- NEXT: v_lshlrev_b32_e32 v3, 16, v3
4164- ; CI-SAFE- NEXT: v_or_b32_e32 v2, v2, v3
4165- ; CI-SAFE- NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4166- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v3, v2
4167- ; CI-SAFE- NEXT: v_lshrrev_b32_e32 v2, 16, v2
4168- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v2, v2
4169- ; CI-SAFE- NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4170- ; CI-SAFE- NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4171- ; CI-SAFE- NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4172- ; CI-SAFE- NEXT: s_setpc_b64 s[30:31]
4151+ ; CI-LABEL: select_fneg_posk_src_add_v2f16:
4152+ ; CI: ; %bb.0:
4153+ ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4154+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4155+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4156+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4157+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4158+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4159+ ; CI-NEXT: v_add_f32_e32 v3, 4.0, v3
4160+ ; CI-NEXT: v_add_f32_e32 v2, 4.0, v2
4161+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4162+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4163+ ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
4164+ ; CI-NEXT: v_or_b32_e32 v2, v2, v3
4165+ ; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4166+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
4167+ ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4168+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4169+ ; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4170+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4171+ ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4172+ ; CI-NEXT: s_setpc_b64 s[30:31]
41734173;
41744174; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
41754175; VI-SAFE: ; %bb.0:
@@ -4229,21 +4229,6 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
42294229; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
42304230; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
42314231;
4232- ; CI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
4233- ; CI-NSZ: ; %bb.0:
4234- ; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4235- ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
4236- ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
4237- ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4238- ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
4239- ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
4240- ; CI-NSZ-NEXT: v_sub_f32_e32 v2, -4.0, v2
4241- ; CI-NSZ-NEXT: v_sub_f32_e32 v3, -4.0, v3
4242- ; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4243- ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4244- ; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4245- ; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
4246- ;
42474232; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
42484233; VI-NSZ: ; %bb.0:
42494234; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4302,6 +4287,105 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
43024287 ret <2 x half > %select
43034288}
43044289
4290+ define <2 x half > @select_fneg_posk_src_add_v2f16_nsz (<2 x i32 > %c , <2 x half > %x , <2 x half > %y ) {
4291+ ; CI-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4292+ ; CI: ; %bb.0:
4293+ ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4294+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4295+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4296+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4297+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4298+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4299+ ; CI-NEXT: v_sub_f32_e32 v2, -4.0, v2
4300+ ; CI-NEXT: v_sub_f32_e32 v3, -4.0, v3
4301+ ; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4302+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4303+ ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4304+ ; CI-NEXT: s_setpc_b64 s[30:31]
4305+ ;
4306+ ; VI-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4307+ ; VI: ; %bb.0:
4308+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4309+ ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4310+ ; VI-NEXT: v_mov_b32_e32 v1, 0xc400
4311+ ; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4312+ ; VI-NEXT: v_sub_f16_e32 v2, -4.0, v2
4313+ ; VI-NEXT: v_mov_b32_e32 v3, 0x4000
4314+ ; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4315+ ; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
4316+ ; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
4317+ ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4318+ ; VI-NEXT: s_setpc_b64 s[30:31]
4319+ ;
4320+ ; GFX9-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4321+ ; GFX9: ; %bb.0:
4322+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4323+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4324+ ; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4325+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
4326+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4327+ ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
4328+ ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4329+ ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
4330+ ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
4331+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
4332+ ;
4333+ ; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4334+ ; GFX11-SAFE-TRUE16: ; %bb.0:
4335+ ; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4336+ ; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4337+ ; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4338+ ; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
4339+ ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4340+ ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
4341+ ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
4342+ ; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
4343+ ;
4344+ ; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4345+ ; GFX11-SAFE-FAKE16: ; %bb.0:
4346+ ; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4347+ ; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4348+ ; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4349+ ; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
4350+ ; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
4351+ ; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
4352+ ; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
4353+ ; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
4354+ ; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
4355+ ; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
4356+ ; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
4357+ ;
4358+ ; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4359+ ; GFX11-NSZ-TRUE16: ; %bb.0:
4360+ ; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4361+ ; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4362+ ; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4363+ ; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
4364+ ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4365+ ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
4366+ ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
4367+ ; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
4368+ ;
4369+ ; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4370+ ; GFX11-NSZ-FAKE16: ; %bb.0:
4371+ ; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4372+ ; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4373+ ; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4374+ ; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
4375+ ; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
4376+ ; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
4377+ ; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
4378+ ; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
4379+ ; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
4380+ ; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
4381+ ; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
4382+ %cmp = icmp eq <2 x i32 > %c , zeroinitializer
4383+ %add = fadd nsz <2 x half > %x , <half 4 .0 , half 4 .0 >
4384+ %fneg = fneg <2 x half > %add
4385+ %select = select <2 x i1 > %cmp , <2 x half > %fneg , <2 x half > <half 2 .0 , half 2 .0 >
4386+ ret <2 x half > %select
4387+ }
4388+
43054389define <2 x half > @select_fneg_posk_src_sub_v2f16 (<2 x i32 > %c , <2 x half > %x ) {
43064390; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
43074391; CI-SAFE: ; %bb.0:
@@ -4704,34 +4788,34 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
47044788}
47054789
47064790define <2 x half > @select_fneg_posk_src_fmad_v2f16 (<2 x i32 > %c , <2 x half > %x , <2 x half > %z ) {
4707- ; CI-SAFE- LABEL: select_fneg_posk_src_fmad_v2f16:
4708- ; CI-SAFE : ; %bb.0:
4709- ; CI-SAFE- NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4710- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v3, v3
4711- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v5, v5
4712- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v2, v2
4713- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v4, v4
4714- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v3, v3
4715- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v5, v5
4716- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v2, v2
4717- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v4, v4
4718- ; CI-SAFE- NEXT: v_mul_f32_e32 v3, 4.0, v3
4719- ; CI-SAFE- NEXT: v_add_f32_e32 v3, v3, v5
4720- ; CI-SAFE- NEXT: v_mul_f32_e32 v2, 4.0, v2
4721- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v3, v3
4722- ; CI-SAFE- NEXT: v_add_f32_e32 v2, v2, v4
4723- ; CI-SAFE- NEXT: v_cvt_f16_f32_e32 v2, v2
4724- ; CI-SAFE- NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4725- ; CI-SAFE- NEXT: v_lshlrev_b32_e32 v3, 16, v3
4726- ; CI-SAFE- NEXT: v_or_b32_e32 v2, v2, v3
4727- ; CI-SAFE- NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4728- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v3, v2
4729- ; CI-SAFE- NEXT: v_lshrrev_b32_e32 v2, 16, v2
4730- ; CI-SAFE- NEXT: v_cvt_f32_f16_e32 v2, v2
4731- ; CI-SAFE- NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4732- ; CI-SAFE- NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4733- ; CI-SAFE- NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4734- ; CI-SAFE- NEXT: s_setpc_b64 s[30:31]
4791+ ; CI-LABEL: select_fneg_posk_src_fmad_v2f16:
4792+ ; CI: ; %bb.0:
4793+ ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4794+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4795+ ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
4796+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4797+ ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
4798+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4799+ ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
4800+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4801+ ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
4802+ ; CI-NEXT: v_mul_f32_e32 v3, 4.0, v3
4803+ ; CI-NEXT: v_add_f32_e32 v3, v3, v5
4804+ ; CI-NEXT: v_mul_f32_e32 v2, 4.0, v2
4805+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4806+ ; CI-NEXT: v_add_f32_e32 v2, v2, v4
4807+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4808+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4809+ ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
4810+ ; CI-NEXT: v_or_b32_e32 v2, v2, v3
4811+ ; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4812+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
4813+ ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4814+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4815+ ; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4816+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4817+ ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4818+ ; CI-NEXT: s_setpc_b64 s[30:31]
47354819;
47364820; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
47374821; VI-SAFE: ; %bb.0:
@@ -4793,27 +4877,6 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
47934877; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
47944878; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
47954879;
4796- ; CI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
4797- ; CI-NSZ: ; %bb.0:
4798- ; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4799- ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
4800- ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
4801- ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v4, v4
4802- ; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v5, v5
4803- ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
4804- ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
4805- ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v4, v4
4806- ; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v5, v5
4807- ; CI-NSZ-NEXT: v_mul_f32_e32 v2, -4.0, v2
4808- ; CI-NSZ-NEXT: v_mul_f32_e32 v3, -4.0, v3
4809- ; CI-NSZ-NEXT: v_sub_f32_e32 v2, v2, v4
4810- ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4811- ; CI-NSZ-NEXT: v_sub_f32_e32 v3, v3, v5
4812- ; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4813- ; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4814- ; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4815- ; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
4816- ;
48174880; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
48184881; VI-NSZ: ; %bb.0:
48194882; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4873,6 +4936,112 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
48734936 ret <2 x half > %select
48744937}
48754938
4939+ define <2 x half > @select_fneg_posk_src_fmad_v2f16_nsz (<2 x i32 > %c , <2 x half > %x , <2 x half > %z ) {
4940+ ; CI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
4941+ ; CI: ; %bb.0:
4942+ ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4943+ ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4944+ ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4945+ ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
4946+ ; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
4947+ ; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4948+ ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4949+ ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
4950+ ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
4951+ ; CI-NEXT: v_mul_f32_e32 v2, -4.0, v2
4952+ ; CI-NEXT: v_mul_f32_e32 v3, -4.0, v3
4953+ ; CI-NEXT: v_sub_f32_e32 v2, v2, v4
4954+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4955+ ; CI-NEXT: v_sub_f32_e32 v3, v3, v5
4956+ ; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4957+ ; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4958+ ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4959+ ; CI-NEXT: s_setpc_b64 s[30:31]
4960+ ;
4961+ ; VI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
4962+ ; VI: ; %bb.0:
4963+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4964+ ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4965+ ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
4966+ ; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
4967+ ; VI-NEXT: v_fma_f16 v1, v4, -4.0, -v1
4968+ ; VI-NEXT: v_fma_f16 v2, v2, -4.0, -v3
4969+ ; VI-NEXT: v_mov_b32_e32 v3, 0x4000
4970+ ; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4971+ ; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
4972+ ; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
4973+ ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4974+ ; VI-NEXT: s_setpc_b64 s[30:31]
4975+ ;
4976+ ; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
4977+ ; GFX9: ; %bb.0:
4978+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4979+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4980+ ; GFX9-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
4981+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
4982+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4983+ ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
4984+ ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4985+ ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
4986+ ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
4987+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
4988+ ;
4989+ ; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
4990+ ; GFX11-SAFE-TRUE16: ; %bb.0:
4991+ ; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4992+ ; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4993+ ; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
4994+ ; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
4995+ ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4996+ ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
4997+ ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
4998+ ; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
4999+ ;
5000+ ; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
5001+ ; GFX11-SAFE-FAKE16: ; %bb.0:
5002+ ; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5003+ ; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
5004+ ; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5005+ ; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
5006+ ; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
5007+ ; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
5008+ ; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5009+ ; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
5010+ ; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
5011+ ; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
5012+ ; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
5013+ ;
5014+ ; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
5015+ ; GFX11-NSZ-TRUE16: ; %bb.0:
5016+ ; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5017+ ; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5018+ ; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
5019+ ; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
5020+ ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5021+ ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
5022+ ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
5023+ ; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
5024+ ;
5025+ ; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
5026+ ; GFX11-NSZ-FAKE16: ; %bb.0:
5027+ ; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5028+ ; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
5029+ ; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5030+ ; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
5031+ ; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
5032+ ; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
5033+ ; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5034+ ; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
5035+ ; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
5036+ ; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
5037+ ; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
5038+ %cmp = icmp eq <2 x i32 > %c , zeroinitializer
5039+ %fmad = call nsz <2 x half > @llvm.fmuladd.v2f16 (<2 x half > %x , <2 x half > <half 4 .0 , half 4 .0 >, <2 x half > %z )
5040+ %fneg = fneg <2 x half > %fmad
5041+ %select = select <2 x i1 > %cmp , <2 x half > %fneg , <2 x half > <half 2 .0 , half 2 .0 >
5042+ ret <2 x half > %select
5043+ }
5044+
48765045declare <2 x half > @llvm.fabs.v2f16 (<2 x half >) #0
48775046declare <2 x half > @llvm.fma.v2f16 (<2 x half >, <2 x half >, <2 x half >) #0
48785047declare <2 x half > @llvm.fmuladd.v2f16 (<2 x half >, <2 x half >, <2 x half >) #0
0 commit comments