Skip to content

Commit 48ceaa5

Browse files
committed
fix tests
1 parent db23bab commit 48ceaa5

File tree

5 files changed

+427
-178
lines changed

5 files changed

+427
-178
lines changed

llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll

Lines changed: 255 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -4148,28 +4148,28 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
41484148
; --------------------------------------------------------------------------------
41494149

41504150
define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
4151-
; CI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
4152-
; CI-SAFE: ; %bb.0:
4153-
; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4154-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
4155-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
4156-
; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4157-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
4158-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
4159-
; CI-SAFE-NEXT: v_add_f32_e32 v3, 4.0, v3
4160-
; CI-SAFE-NEXT: v_add_f32_e32 v2, 4.0, v2
4161-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
4162-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
4163-
; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
4164-
; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
4165-
; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4166-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
4167-
; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4168-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
4169-
; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4170-
; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4171-
; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4172-
; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
4151+
; CI-LABEL: select_fneg_posk_src_add_v2f16:
4152+
; CI: ; %bb.0:
4153+
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4154+
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4155+
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4156+
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4157+
; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4158+
; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4159+
; CI-NEXT: v_add_f32_e32 v3, 4.0, v3
4160+
; CI-NEXT: v_add_f32_e32 v2, 4.0, v2
4161+
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4162+
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4163+
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
4164+
; CI-NEXT: v_or_b32_e32 v2, v2, v3
4165+
; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4166+
; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
4167+
; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4168+
; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4169+
; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4170+
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4171+
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4172+
; CI-NEXT: s_setpc_b64 s[30:31]
41734173
;
41744174
; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
41754175
; VI-SAFE: ; %bb.0:
@@ -4229,21 +4229,6 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
42294229
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
42304230
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
42314231
;
4232-
; CI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
4233-
; CI-NSZ: ; %bb.0:
4234-
; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4235-
; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
4236-
; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
4237-
; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4238-
; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
4239-
; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
4240-
; CI-NSZ-NEXT: v_sub_f32_e32 v2, -4.0, v2
4241-
; CI-NSZ-NEXT: v_sub_f32_e32 v3, -4.0, v3
4242-
; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4243-
; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4244-
; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4245-
; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
4246-
;
42474232
; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
42484233
; VI-NSZ: ; %bb.0:
42494234
; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4302,6 +4287,105 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
43024287
ret <2 x half> %select
43034288
}
43044289

4290+
define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %x, <2 x half> %y) {
4291+
; CI-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4292+
; CI: ; %bb.0:
4293+
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4294+
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4295+
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4296+
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4297+
; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4298+
; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4299+
; CI-NEXT: v_sub_f32_e32 v2, -4.0, v2
4300+
; CI-NEXT: v_sub_f32_e32 v3, -4.0, v3
4301+
; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4302+
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4303+
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4304+
; CI-NEXT: s_setpc_b64 s[30:31]
4305+
;
4306+
; VI-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4307+
; VI: ; %bb.0:
4308+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4309+
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4310+
; VI-NEXT: v_mov_b32_e32 v1, 0xc400
4311+
; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4312+
; VI-NEXT: v_sub_f16_e32 v2, -4.0, v2
4313+
; VI-NEXT: v_mov_b32_e32 v3, 0x4000
4314+
; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4315+
; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
4316+
; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
4317+
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4318+
; VI-NEXT: s_setpc_b64 s[30:31]
4319+
;
4320+
; GFX9-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4321+
; GFX9: ; %bb.0:
4322+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4323+
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4324+
; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4325+
; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
4326+
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4327+
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
4328+
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4329+
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
4330+
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
4331+
; GFX9-NEXT: s_setpc_b64 s[30:31]
4332+
;
4333+
; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4334+
; GFX11-SAFE-TRUE16: ; %bb.0:
4335+
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4336+
; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4337+
; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4338+
; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
4339+
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4340+
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
4341+
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
4342+
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
4343+
;
4344+
; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4345+
; GFX11-SAFE-FAKE16: ; %bb.0:
4346+
; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4347+
; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4348+
; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4349+
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
4350+
; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
4351+
; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
4352+
; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
4353+
; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
4354+
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
4355+
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
4356+
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
4357+
;
4358+
; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4359+
; GFX11-NSZ-TRUE16: ; %bb.0:
4360+
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4361+
; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4362+
; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4363+
; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
4364+
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4365+
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
4366+
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
4367+
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
4368+
;
4369+
; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
4370+
; GFX11-NSZ-FAKE16: ; %bb.0:
4371+
; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4372+
; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
4373+
; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4374+
; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
4375+
; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
4376+
; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
4377+
; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
4378+
; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
4379+
; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
4380+
; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
4381+
; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
4382+
%cmp = icmp eq <2 x i32> %c, zeroinitializer
4383+
%add = fadd nsz <2 x half> %x, <half 4.0, half 4.0>
4384+
%fneg = fneg <2 x half> %add
4385+
%select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
4386+
ret <2 x half> %select
4387+
}
4388+
43054389
define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
43064390
; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
43074391
; CI-SAFE: ; %bb.0:
@@ -4704,34 +4788,34 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
47044788
}
47054789

47064790
define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %z) {
4707-
; CI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
4708-
; CI-SAFE: ; %bb.0:
4709-
; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4710-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
4711-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5
4712-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
4713-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4
4714-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
4715-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5
4716-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
4717-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4
4718-
; CI-SAFE-NEXT: v_mul_f32_e32 v3, 4.0, v3
4719-
; CI-SAFE-NEXT: v_add_f32_e32 v3, v3, v5
4720-
; CI-SAFE-NEXT: v_mul_f32_e32 v2, 4.0, v2
4721-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
4722-
; CI-SAFE-NEXT: v_add_f32_e32 v2, v2, v4
4723-
; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
4724-
; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4725-
; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
4726-
; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
4727-
; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4728-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
4729-
; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4730-
; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
4731-
; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4732-
; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4733-
; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4734-
; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
4791+
; CI-LABEL: select_fneg_posk_src_fmad_v2f16:
4792+
; CI: ; %bb.0:
4793+
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4794+
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4795+
; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
4796+
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4797+
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
4798+
; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4799+
; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
4800+
; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4801+
; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
4802+
; CI-NEXT: v_mul_f32_e32 v3, 4.0, v3
4803+
; CI-NEXT: v_add_f32_e32 v3, v3, v5
4804+
; CI-NEXT: v_mul_f32_e32 v2, 4.0, v2
4805+
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4806+
; CI-NEXT: v_add_f32_e32 v2, v2, v4
4807+
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4808+
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4809+
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
4810+
; CI-NEXT: v_or_b32_e32 v2, v2, v3
4811+
; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
4812+
; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
4813+
; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4814+
; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4815+
; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
4816+
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4817+
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
4818+
; CI-NEXT: s_setpc_b64 s[30:31]
47354819
;
47364820
; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
47374821
; VI-SAFE: ; %bb.0:
@@ -4793,27 +4877,6 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
47934877
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
47944878
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
47954879
;
4796-
; CI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
4797-
; CI-NSZ: ; %bb.0:
4798-
; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4799-
; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
4800-
; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
4801-
; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v4, v4
4802-
; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v5, v5
4803-
; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
4804-
; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
4805-
; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v4, v4
4806-
; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v5, v5
4807-
; CI-NSZ-NEXT: v_mul_f32_e32 v2, -4.0, v2
4808-
; CI-NSZ-NEXT: v_mul_f32_e32 v3, -4.0, v3
4809-
; CI-NSZ-NEXT: v_sub_f32_e32 v2, v2, v4
4810-
; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4811-
; CI-NSZ-NEXT: v_sub_f32_e32 v3, v3, v5
4812-
; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4813-
; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4814-
; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4815-
; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
4816-
;
48174880
; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
48184881
; VI-NSZ: ; %bb.0:
48194882
; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4873,6 +4936,112 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
48734936
ret <2 x half> %select
48744937
}
48754938

4939+
define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half> %x, <2 x half> %z) {
4940+
; CI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
4941+
; CI: ; %bb.0:
4942+
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4943+
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
4944+
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
4945+
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
4946+
; CI-NEXT: v_cvt_f16_f32_e32 v5, v5
4947+
; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
4948+
; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
4949+
; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
4950+
; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
4951+
; CI-NEXT: v_mul_f32_e32 v2, -4.0, v2
4952+
; CI-NEXT: v_mul_f32_e32 v3, -4.0, v3
4953+
; CI-NEXT: v_sub_f32_e32 v2, v2, v4
4954+
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
4955+
; CI-NEXT: v_sub_f32_e32 v3, v3, v5
4956+
; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
4957+
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4958+
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
4959+
; CI-NEXT: s_setpc_b64 s[30:31]
4960+
;
4961+
; VI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
4962+
; VI: ; %bb.0:
4963+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4964+
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4965+
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
4966+
; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
4967+
; VI-NEXT: v_fma_f16 v1, v4, -4.0, -v1
4968+
; VI-NEXT: v_fma_f16 v2, v2, -4.0, -v3
4969+
; VI-NEXT: v_mov_b32_e32 v3, 0x4000
4970+
; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4971+
; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
4972+
; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
4973+
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4974+
; VI-NEXT: s_setpc_b64 s[30:31]
4975+
;
4976+
; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
4977+
; GFX9: ; %bb.0:
4978+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4979+
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
4980+
; GFX9-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
4981+
; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
4982+
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
4983+
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
4984+
; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4985+
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
4986+
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
4987+
; GFX9-NEXT: s_setpc_b64 s[30:31]
4988+
;
4989+
; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
4990+
; GFX11-SAFE-TRUE16: ; %bb.0:
4991+
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4992+
; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
4993+
; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
4994+
; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
4995+
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4996+
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
4997+
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
4998+
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
4999+
;
5000+
; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
5001+
; GFX11-SAFE-FAKE16: ; %bb.0:
5002+
; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5003+
; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
5004+
; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5005+
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
5006+
; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
5007+
; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
5008+
; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5009+
; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
5010+
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
5011+
; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
5012+
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
5013+
;
5014+
; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
5015+
; GFX11-NSZ-TRUE16: ; %bb.0:
5016+
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5017+
; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5018+
; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
5019+
; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
5020+
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5021+
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
5022+
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
5023+
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
5024+
;
5025+
; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
5026+
; GFX11-NSZ-FAKE16: ; %bb.0:
5027+
; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5028+
; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
5029+
; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
5030+
; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
5031+
; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
5032+
; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
5033+
; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
5034+
; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
5035+
; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
5036+
; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
5037+
; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
5038+
%cmp = icmp eq <2 x i32> %c, zeroinitializer
5039+
%fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
5040+
%fneg = fneg <2 x half> %fmad
5041+
%select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
5042+
ret <2 x half> %select
5043+
}
5044+
48765045
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
48775046
declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
48785047
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0

0 commit comments

Comments
 (0)