From 67c7d1cda01b91a6ab76c50e38e3038e4af9d83c Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Mon, 6 Oct 2025 17:20:53 +0800 Subject: [PATCH 1/4] [DAGCombiner] Remove NoSignedZerosFPMath in visitFNEG --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 204e1f0c75e00..d104d8a2967b6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19301,9 +19301,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't // know it was called from a context with a nsz flag if the input fsub does // not. - if (N0.getOpcode() == ISD::FSUB && - (DAG.getTarget().Options.NoSignedZerosFPMath || - N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { + if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() && + N0.hasOneUse()) { return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1), N0.getOperand(0)); } From 30b176a8f206d2f813b0e76c545eb76b74aa36dc Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Mon, 6 Oct 2025 19:53:44 +0800 Subject: [PATCH 2/4] fix tests --- llvm/test/CodeGen/AMDGPU/fsub.ll | 5 +- .../AMDGPU/select-fabs-fneg-extract.v2f16.ll | 160 ++++++++++++++---- 2 files changed, 124 insertions(+), 41 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll index 743431c7e0e67..984690fd0ea16 100644 --- a/llvm/test/CodeGen/AMDGPU/fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/fsub.ll @@ -100,7 +100,7 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, %a = load float, ptr addrspace(1) %in, align 4 %b = load float, ptr addrspace(1) %b_ptr, align 4 %result = fsub float %a, %b - %neg.result = fsub float -0.0, %result + %neg.result = fsub nsz float -0.0, %result store float %neg.result, ptr addrspace(1) %out, align 4 ret void } @@ -129,6 +129,3 @@ define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr store float %result, ptr addrspace(1) %out, align 4 ret void } - -attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" } -attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll index bb22144b815a1..eb69a53a72c90 100644 --- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll @@ -4387,28 +4387,28 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> % } define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) { -; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: -; CI-SAFE: ; %bb.0: -; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-SAFE-NEXT: v_add_f32_e32 v3, -4.0, v3 -; CI-SAFE-NEXT: v_add_f32_e32 v2, -4.0, v2 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3 -; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2 -; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc -; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc -; CI-SAFE-NEXT: s_setpc_b64 s[30:31] +; CI-LABEL: select_fneg_posk_src_sub_v2f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_add_f32_e32 v3, -4.0, v3 +; CI-NEXT: v_add_f32_e32 v2, -4.0, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; CI-NEXT: v_or_b32_e32 v2, v2, v3 +; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v2 +; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc +; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: ; VI-SAFE: ; %bb.0: @@ -4468,21 +4468,6 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) { ; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16: -; CI-NSZ: ; %bb.0: -; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NSZ-NEXT: v_sub_f32_e32 v2, 4.0, v2 -; CI-NSZ-NEXT: v_sub_f32_e32 v3, 4.0, v3 -; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc -; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc -; CI-NSZ-NEXT: s_setpc_b64 s[30:31] -; ; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16: ; VI-NSZ: ; %bb.0: ; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4541,6 +4526,105 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) { ret <2 x half> %select } +define <2 x half> @select_fneg_posk_src_sub_v2f16_nsz(<2 x i32> %c, <2 x half> %x) { +; CI-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_sub_f32_e32 v2, 4.0, v2 +; CI-NEXT: v_sub_f32_e32 v3, 4.0, v3 +; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc +; CI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0x4400 +; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_sub_f16_e32 v2, 4.0, v2 +; VI-NEXT: v_mov_b32_e32 v3, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX11-SAFE-TRUE16: ; %bb.0: +; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX11-SAFE-FAKE16: ; %bb.0: +; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX11-NSZ-TRUE16: ; %bb.0: +; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX11-NSZ-FAKE16: ; %bb.0: +; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] + %cmp = icmp eq <2 x i32> %c, zeroinitializer + %add = fsub <2 x half> %x, + %fneg = fneg nsz <2 x half> %add + %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> + ret <2 x half> %select +} + define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) { ; CI-LABEL: select_fneg_posk_src_mul_v2f16: ; CI: ; %bb.0: @@ -5048,6 +5132,8 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CI-NSZ: {{.*}} +; CI-SAFE: {{.*}} ; GFX11: {{.*}} ; GFX11-NSZ: {{.*}} ; GFX11-SAFE: {{.*}} From e78b8bb69205e9243118b75b0f64cb0c9f0b0507 Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Tue, 7 Oct 2025 13:51:07 +0800 Subject: [PATCH 3/4] remove redundant tests --- llvm/test/CodeGen/AMDGPU/fsub.ll | 35 +++----------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll index 984690fd0ea16..d6a9cb1545f37 100644 --- a/llvm/test/CodeGen/AMDGPU/fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/fsub.ll @@ -92,40 +92,11 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrsp ret void } -; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32: -; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} -; SI-NOT: xor -define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { - %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1 - %a = load float, ptr addrspace(1) %in, align 4 - %b = load float, ptr addrspace(1) %b_ptr, align 4 - %result = fsub float %a, %b - %neg.result = fsub nsz float -0.0, %result - store float %neg.result, ptr addrspace(1) %out, align 4 - ret void -} - -; For some reason the attribute has a string "true" or "false", so -; make sure it is disabled and the fneg is not folded if it is not -; "true". -; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32: -; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} -; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] -define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { - %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1 - %a = load float, ptr addrspace(1) %in, align 4 - %b = load float, ptr addrspace(1) %b_ptr, align 4 - %result = fsub float %a, %b - %neg.result = fsub float -0.0, %result - store float %neg.result, ptr addrspace(1) %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32: +; FUNC-LABEL: {{^}}v_fsub_0_nsz_flag_f32: ; SI-NOT: v_sub -define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +define amdgpu_kernel void @v_fsub_0_nsz_flag_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { %a = load float, ptr addrspace(1) %in, align 4 - %result = fsub float %a, 0.0 + %result = fsub nsz float %a, 0.0 store float %result, ptr addrspace(1) %out, align 4 ret void } From 01f373b5d902cc76e4894b33aedd36e9c3c96ab9 Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Tue, 7 Oct 2025 14:10:13 +0800 Subject: [PATCH 4/4] remove redundant test runs --- .../AMDGPU/select-fabs-fneg-extract.v2f16.ll | 4004 ++++++----------- 1 file changed, 1327 insertions(+), 2677 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll index eb69a53a72c90..9814ed80befbf 100644 --- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll @@ -1,15 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s -; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9,GFX9-SAFE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s - -; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s -; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX9,GFX9-NSZ %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI %s +; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) { ; CI-LABEL: add_select_fabs_fabs_v2f16: @@ -63,69 +57,37 @@ define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -198,73 +160,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fabs_fabs_v2f16(<2 x ; GFX9-NEXT: v_pk_add_f16 v1, v1, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -328,73 +256,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fabs_fabs_v2f1 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v2 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -469,73 +363,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fabs_fabs_v2f16(<2 x ; GFX9-NEXT: v_pk_add_f16 v1, v2, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -597,63 +457,34 @@ define <2 x half> @add_select_fabs_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_var_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_var_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_var_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_var_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_var_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_var_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %y @@ -709,61 +540,33 @@ define <2 x half> @add_select_fabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> @@ -815,61 +618,33 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) ; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %select) @@ -920,61 +695,33 @@ define <2 x half> @add_select_posk_posk_v2f16(<2 x i32> %c, <2 x half> %x) { ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_posk_posk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_posk_posk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_posk_posk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_posk_posk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_posk_posk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_posk_posk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %add = fadd <2 x half> %select, %x @@ -1029,61 +776,33 @@ define <2 x half> @add_select_negk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negk_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negk_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %fabs @@ -1140,61 +859,33 @@ define <2 x half> @add_select_negliteralk_fabs_v2f16(<2 x i32> %c, <2 x half> %x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %fabs @@ -1250,61 +941,33 @@ define <2 x half> @add_select_fabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_posk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_posk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_posk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_posk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_posk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_posk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> @@ -1360,61 +1023,33 @@ define <2 x half> @add_select_posk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_posk_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_posk_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %fabs @@ -1470,57 +1105,31 @@ define <2 x half> @add_select_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fneg.y = fneg <2 x half> %y @@ -1587,61 +1196,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fneg_fneg_v2f16(<2 x ; GFX9-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fneg.y = fneg <2 x half> %y @@ -1705,61 +1286,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fneg_fneg_v2f1 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fneg.y = fneg <2 x half> %y @@ -1828,61 +1381,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fneg_fneg_v2f16(<2 x ; GFX9-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fneg.y = fneg <2 x half> %y @@ -1948,63 +1473,34 @@ define <2 x half> @add_select_fneg_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_var_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_var_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_var_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_var_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_var_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_var_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %y @@ -2058,55 +1554,30 @@ define <2 x half> @add_select_fneg_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> @@ -2161,55 +1632,30 @@ define <2 x half> @add_select_fneg_inv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> @@ -2264,55 +1710,30 @@ define <2 x half> @add_select_fneg_neginv2pi_v2f16(<2 x i32> %c, <2 x half> %x, ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> @@ -2363,61 +1784,33 @@ define <2 x half> @add_select_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) { ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negk_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negk_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negk_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negk_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negk_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negk_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %add = fadd <2 x half> %select, %x @@ -2469,61 +1862,33 @@ define <2 x half> @add_select_negliteralk_negliteralk_v2f16(<2 x i32> %c, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %add = fadd <2 x half> %select, %x @@ -2573,61 +1938,33 @@ define <2 x half> @add_select_fneg_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) ; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %fneg.x = fneg <2 x half> %select @@ -2681,55 +2018,30 @@ define <2 x half> @add_select_negk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negk_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negk_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %fneg.x @@ -2783,55 +2095,30 @@ define <2 x half> @add_select_fneg_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_posk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_posk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_posk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_posk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_posk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_posk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> @@ -2885,55 +2172,30 @@ define <2 x half> @add_select_posk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_posk_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_posk_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> , <2 x half> %fneg.x @@ -2997,69 +2259,37 @@ define <2 x half> @add_select_negfabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negfabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negfabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -3125,69 +2355,37 @@ define <2 x half> @add_select_fabs_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negfabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negfabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negfabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negfabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_negfabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_negfabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -3253,69 +2451,37 @@ define <2 x half> @add_select_neg_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_neg_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_neg_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_neg_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_neg_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_neg_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_neg_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -3380,69 +2546,37 @@ define <2 x half> @add_select_fabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_neg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_neg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_neg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_neg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_neg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_neg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.y = fneg <2 x half> %y @@ -3501,63 +2635,34 @@ define <2 x half> @add_select_neg_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_neg_negfabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_neg_negfabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_neg_negfabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_neg_negfabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_neg_negfabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_neg_negfabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -3617,63 +2722,34 @@ define <2 x half> @add_select_negfabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_neg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_neg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_neg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_neg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negfabs_neg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negfabs_neg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -3735,61 +2811,33 @@ define <2 x half> @mul_select_negfabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_posk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_posk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_posk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_posk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: mul_select_negfabs_posk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: mul_select_negfabs_posk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -3850,61 +2898,33 @@ define <2 x half> @mul_select_posk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: mul_select_posk_negfabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: mul_select_posk_negfabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: mul_select_posk_negfabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: mul_select_posk_negfabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: mul_select_posk_negfabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: mul_select_posk_negfabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -3965,61 +2985,33 @@ define <2 x half> @mul_select_negfabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: mul_select_negfabs_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: mul_select_negfabs_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -4080,61 +3072,33 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: mul_select_negk_negfabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: mul_select_negk_negfabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: mul_select_negk_negfabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: mul_select_negk_negfabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: mul_select_negk_negfabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: mul_select_negk_negfabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -4171,115 +3135,63 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, < ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc ; CI-NEXT: s_setpc_b64 s[30:31] ; -; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16: -; VI-SAFE: ; %bb.0: -; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x4400 -; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-SAFE-NEXT: v_add_f16_e32 v2, 4.0, v2 -; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SAFE-LABEL: select_fneg_posk_src_add_v2f16: -; GFX9-SAFE: ; %bb.0: -; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] -; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16: -; VI-NSZ: ; %bb.0: -; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0xc400 -; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NSZ-NEXT: v_sub_f16_e32 v2, -4.0, v2 -; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] -; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-NSZ-LABEL: select_fneg_posk_src_add_v2f16: -; GFX9-NSZ: ; %bb.0: -; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: select_fneg_posk_src_add_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0x4400 +; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_add_f16_e32 v2, 4.0, v2 +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_add_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %add = fadd <2 x half> %x, %fneg = fneg <2 x half> %add @@ -4330,55 +3242,30 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> % ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %add = fadd nsz <2 x half> %x, %fneg = fneg <2 x half> %add @@ -4410,115 +3297,63 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) { ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc ; CI-NEXT: s_setpc_b64 s[30:31] ; -; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: -; VI-SAFE: ; %bb.0: -; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0xc400 -; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-SAFE-NEXT: v_add_f16_e32 v2, -4.0, v2 -; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX9-SAFE: ; %bb.0: -; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] -; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16: -; VI-NSZ: ; %bb.0: -; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x4400 -; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NSZ-NEXT: v_sub_f16_e32 v2, 4.0, v2 -; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] -; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-NSZ-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX9-NSZ: ; %bb.0: -; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: select_fneg_posk_src_sub_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0xc400 +; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_add_f16_e32 v2, -4.0, v2 +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_sub_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %add = fsub <2 x half> %x, %fneg = fneg <2 x half> %add @@ -4569,55 +3404,30 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16_nsz(<2 x i32> %c, <2 x half> % ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %add = fsub <2 x half> %x, %fneg = fneg nsz <2 x half> %add @@ -4668,55 +3478,30 @@ define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) { ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %mul = fmul <2 x half> %x, %fneg = fneg <2 x half> %mul @@ -4752,118 +3537,65 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, < ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc ; CI-NEXT: s_setpc_b64 s[30:31] ; -; VI-SAFE-LABEL: select_fneg_posk_src_fma_v2f16: -; VI-SAFE: ; %bb.0: -; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1 -; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3 -; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SAFE-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX9-SAFE: ; %bb.0: -; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; VI-NSZ-LABEL: select_fneg_posk_src_fma_v2f16: -; VI-NSZ: ; %bb.0: -; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1 -; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3 -; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] -; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-NSZ-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX9-NSZ: ; %bb.0: -; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: select_fneg_posk_src_fma_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1 +; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3 +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_fma_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> , <2 x half> %z) %fneg = fneg <2 x half> %fma @@ -4901,118 +3633,65 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc ; CI-NEXT: s_setpc_b64 s[30:31] ; -; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16: -; VI-SAFE: ; %bb.0: -; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1 -; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3 -; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX9-SAFE: ; %bb.0: -; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16: -; VI-NSZ: ; %bb.0: -; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1 -; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3 -; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] -; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX9-NSZ: ; %bb.0: -; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: select_fneg_posk_src_fmad_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1 +; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3 +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> , <2 x half> %z) %fneg = fneg <2 x half> %fmad @@ -5070,55 +3749,30 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half> ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> , <2 x half> %z) %fneg = fneg <2 x half> %fmad @@ -5132,8 +3786,4 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CI-NSZ: {{.*}} -; CI-SAFE: {{.*}} ; GFX11: {{.*}} -; GFX11-NSZ: {{.*}} -; GFX11-SAFE: {{.*}}